1 /*
2 * Copyright (c) 2008, 2018, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #include "precompiled.hpp"
26 #include "asm/assembler.hpp"
27 #include "asm/assembler.inline.hpp"
28 #include "asm/macroAssembler.hpp"
29 #include "ci/ciEnv.hpp"
30 #include "code/nativeInst.hpp"
31 #include "compiler/disassembler.hpp"
32 #include "gc/shared/barrierSet.hpp"
33 #include "gc/shared/cardTable.hpp"
34 #include "gc/shared/barrierSetAssembler.hpp"
35 #include "gc/shared/cardTableBarrierSet.hpp"
36 #include "gc/shared/collectedHeap.inline.hpp"
37 #include "interpreter/interpreter.hpp"
38 #include "memory/resourceArea.hpp"
39 #include "oops/accessDecorators.hpp"
40 #include "oops/klass.inline.hpp"
41 #include "prims/methodHandles.hpp"
42 #include "runtime/biasedLocking.hpp"
43 #include "runtime/interfaceSupport.inline.hpp"
44 #include "runtime/objectMonitor.hpp"
45 #include "runtime/os.hpp"
46 #include "runtime/sharedRuntime.hpp"
47 #include "runtime/stubRoutines.hpp"
48 #include "utilities/macros.hpp"
49
50 // Implementation of AddressLiteral
51
set_rspec(relocInfo::relocType rtype)52 void AddressLiteral::set_rspec(relocInfo::relocType rtype) {
53 switch (rtype) {
54 case relocInfo::oop_type:
55 // Oops are a special case. Normally they would be their own section
56 // but in cases like icBuffer they are literals in the code stream that
57 // we don't have a section for. We use none so that we get a literal address
58 // which is always patchable.
59 break;
60 case relocInfo::external_word_type:
61 _rspec = external_word_Relocation::spec(_target);
62 break;
63 case relocInfo::internal_word_type:
64 _rspec = internal_word_Relocation::spec(_target);
65 break;
66 case relocInfo::opt_virtual_call_type:
67 _rspec = opt_virtual_call_Relocation::spec();
68 break;
69 case relocInfo::static_call_type:
70 _rspec = static_call_Relocation::spec();
71 break;
72 case relocInfo::runtime_call_type:
73 _rspec = runtime_call_Relocation::spec();
74 break;
75 case relocInfo::poll_type:
76 case relocInfo::poll_return_type:
77 _rspec = Relocation::spec_simple(rtype);
78 break;
79 case relocInfo::none:
80 break;
81 default:
82 ShouldNotReachHere();
83 break;
84 }
85 }
86
87 // Initially added to the Assembler interface as a pure virtual:
88 // RegisterConstant delayed_value(..)
89 // for:
90 // 6812678 macro assembler needs delayed binding of a few constants (for 6655638)
91 // this was subsequently modified to its present name and return type
delayed_value_impl(intptr_t * delayed_value_addr,Register tmp,int offset)92 RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr,
93 Register tmp,
94 int offset) {
95 ShouldNotReachHere();
96 return RegisterOrConstant(-1);
97 }
98
99
100 #ifdef AARCH64
101 // Note: ARM32 version is OS dependent
breakpoint(AsmCondition cond)102 void MacroAssembler::breakpoint(AsmCondition cond) {
103 if (cond == al) {
104 brk();
105 } else {
106 Label L;
107 b(L, inverse(cond));
108 brk();
109 bind(L);
110 }
111 }
112 #endif // AARCH64
113
114
115 // virtual method calling
lookup_virtual_method(Register recv_klass,Register vtable_index,Register method_result)116 void MacroAssembler::lookup_virtual_method(Register recv_klass,
117 Register vtable_index,
118 Register method_result) {
119 const int base_offset = in_bytes(Klass::vtable_start_offset()) + vtableEntry::method_offset_in_bytes();
120 assert(vtableEntry::size() * wordSize == wordSize, "adjust the scaling in the code below");
121 add(recv_klass, recv_klass, AsmOperand(vtable_index, lsl, LogBytesPerWord));
122 ldr(method_result, Address(recv_klass, base_offset));
123 }
124
125
126 // Simplified, combined version, good for typical uses.
127 // Falls through on failure.
check_klass_subtype(Register sub_klass,Register super_klass,Register temp_reg,Register temp_reg2,Register temp_reg3,Label & L_success)128 void MacroAssembler::check_klass_subtype(Register sub_klass,
129 Register super_klass,
130 Register temp_reg,
131 Register temp_reg2,
132 Register temp_reg3,
133 Label& L_success) {
134 Label L_failure;
135 check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg, temp_reg2, &L_success, &L_failure, NULL);
136 check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, temp_reg2, temp_reg3, &L_success, NULL);
137 bind(L_failure);
138 };
139
check_klass_subtype_fast_path(Register sub_klass,Register super_klass,Register temp_reg,Register temp_reg2,Label * L_success,Label * L_failure,Label * L_slow_path)140 void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
141 Register super_klass,
142 Register temp_reg,
143 Register temp_reg2,
144 Label* L_success,
145 Label* L_failure,
146 Label* L_slow_path) {
147
148 assert_different_registers(sub_klass, super_klass, temp_reg, temp_reg2, noreg);
149 const Register super_check_offset = temp_reg2;
150
151 Label L_fallthrough;
152 int label_nulls = 0;
153 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; }
154 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; }
155 if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; }
156 assert(label_nulls <= 1, "at most one NULL in the batch");
157
158 int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
159 int sco_offset = in_bytes(Klass::super_check_offset_offset());
160 Address super_check_offset_addr(super_klass, sco_offset);
161
162 // If the pointers are equal, we are done (e.g., String[] elements).
163 // This self-check enables sharing of secondary supertype arrays among
164 // non-primary types such as array-of-interface. Otherwise, each such
165 // type would need its own customized SSA.
166 // We move this check to the front of the fast path because many
167 // type checks are in fact trivially successful in this manner,
168 // so we get a nicely predicted branch right at the start of the check.
169 cmp(sub_klass, super_klass);
170 b(*L_success, eq);
171
172 // Check the supertype display:
173 ldr_u32(super_check_offset, super_check_offset_addr);
174
175 Address super_check_addr(sub_klass, super_check_offset);
176 ldr(temp_reg, super_check_addr);
177 cmp(super_klass, temp_reg); // load displayed supertype
178
179 // This check has worked decisively for primary supers.
180 // Secondary supers are sought in the super_cache ('super_cache_addr').
181 // (Secondary supers are interfaces and very deeply nested subtypes.)
182 // This works in the same check above because of a tricky aliasing
183 // between the super_cache and the primary super display elements.
184 // (The 'super_check_addr' can address either, as the case requires.)
185 // Note that the cache is updated below if it does not help us find
186 // what we need immediately.
187 // So if it was a primary super, we can just fail immediately.
188 // Otherwise, it's the slow path for us (no success at this point).
189
190 b(*L_success, eq);
191 cmp_32(super_check_offset, sc_offset);
192 if (L_failure == &L_fallthrough) {
193 b(*L_slow_path, eq);
194 } else {
195 b(*L_failure, ne);
196 if (L_slow_path != &L_fallthrough) {
197 b(*L_slow_path);
198 }
199 }
200
201 bind(L_fallthrough);
202 }
203
204
check_klass_subtype_slow_path(Register sub_klass,Register super_klass,Register temp_reg,Register temp2_reg,Register temp3_reg,Label * L_success,Label * L_failure,bool set_cond_codes)205 void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
206 Register super_klass,
207 Register temp_reg,
208 Register temp2_reg,
209 Register temp3_reg,
210 Label* L_success,
211 Label* L_failure,
212 bool set_cond_codes) {
213 #ifdef AARCH64
214 NOT_IMPLEMENTED();
215 #else
216 // Note: if used by code that expects a register to be 0 on success,
217 // this register must be temp_reg and set_cond_codes must be true
218
219 Register saved_reg = noreg;
220
221 // get additional tmp registers
222 if (temp3_reg == noreg) {
223 saved_reg = temp3_reg = LR;
224 push(saved_reg);
225 }
226
227 assert(temp2_reg != noreg, "need all the temporary registers");
228 assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg, temp3_reg);
229
230 Register cmp_temp = temp_reg;
231 Register scan_temp = temp3_reg;
232 Register count_temp = temp2_reg;
233
234 Label L_fallthrough;
235 int label_nulls = 0;
236 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; }
237 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; }
238 assert(label_nulls <= 1, "at most one NULL in the batch");
239
240 // a couple of useful fields in sub_klass:
241 int ss_offset = in_bytes(Klass::secondary_supers_offset());
242 int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
243 Address secondary_supers_addr(sub_klass, ss_offset);
244 Address super_cache_addr( sub_klass, sc_offset);
245
246 #ifndef PRODUCT
247 inc_counter((address)&SharedRuntime::_partial_subtype_ctr, scan_temp, count_temp);
248 #endif
249
250 // We will consult the secondary-super array.
251 ldr(scan_temp, Address(sub_klass, ss_offset));
252
253 assert(! UseCompressedOops, "search_key must be the compressed super_klass");
254 // else search_key is the
255 Register search_key = super_klass;
256
257 // Load the array length.
258 ldr(count_temp, Address(scan_temp, Array<Klass*>::length_offset_in_bytes()));
259 add(scan_temp, scan_temp, Array<Klass*>::base_offset_in_bytes());
260
261 add(count_temp, count_temp, 1);
262
263 Label L_loop, L_setnz_and_fail, L_fail;
264
265 // Top of search loop
266 bind(L_loop);
267 // Notes:
268 // scan_temp starts at the array elements
269 // count_temp is 1+size
270 subs(count_temp, count_temp, 1);
271 if ((L_failure != &L_fallthrough) && (! set_cond_codes) && (saved_reg == noreg)) {
272 // direct jump to L_failure if failed and no cleanup needed
273 b(*L_failure, eq); // not found and
274 } else {
275 b(L_fail, eq); // not found in the array
276 }
277
278 // Load next super to check
279 // In the array of super classes elements are pointer sized.
280 int element_size = wordSize;
281 ldr(cmp_temp, Address(scan_temp, element_size, post_indexed));
282
283 // Look for Rsuper_klass on Rsub_klass's secondary super-class-overflow list
284 subs(cmp_temp, cmp_temp, search_key);
285
286 // A miss means we are NOT a subtype and need to keep looping
287 b(L_loop, ne);
288
289 // Falling out the bottom means we found a hit; we ARE a subtype
290
291 // Note: temp_reg/cmp_temp is already 0 and flag Z is set
292
293 // Success. Cache the super we found and proceed in triumph.
294 str(super_klass, Address(sub_klass, sc_offset));
295
296 if (saved_reg != noreg) {
297 // Return success
298 pop(saved_reg);
299 }
300
301 b(*L_success);
302
303 bind(L_fail);
304 // Note1: check "b(*L_failure, eq)" above if adding extra instructions here
305 if (set_cond_codes) {
306 movs(temp_reg, sub_klass); // clears Z and sets temp_reg to non-0 if needed
307 }
308 if (saved_reg != noreg) {
309 pop(saved_reg);
310 }
311 if (L_failure != &L_fallthrough) {
312 b(*L_failure);
313 }
314
315 bind(L_fallthrough);
316 #endif
317 }
318
319 // Returns address of receiver parameter, using tmp as base register. tmp and params_count can be the same.
receiver_argument_address(Register params_base,Register params_count,Register tmp)320 Address MacroAssembler::receiver_argument_address(Register params_base, Register params_count, Register tmp) {
321 assert_different_registers(params_base, params_count);
322 add(tmp, params_base, AsmOperand(params_count, lsl, Interpreter::logStackElementSize));
323 return Address(tmp, -Interpreter::stackElementSize);
324 }
325
326
align(int modulus)327 void MacroAssembler::align(int modulus) {
328 while (offset() % modulus != 0) {
329 nop();
330 }
331 }
332
set_last_Java_frame(Register last_java_sp,Register last_java_fp,bool save_last_java_pc,Register tmp)333 int MacroAssembler::set_last_Java_frame(Register last_java_sp,
334 Register last_java_fp,
335 bool save_last_java_pc,
336 Register tmp) {
337 int pc_offset;
338 if (last_java_fp != noreg) {
339 // optional
340 str(last_java_fp, Address(Rthread, JavaThread::last_Java_fp_offset()));
341 _fp_saved = true;
342 } else {
343 _fp_saved = false;
344 }
345 if (AARCH64_ONLY(true) NOT_AARCH64(save_last_java_pc)) { // optional on 32-bit ARM
346 #ifdef AARCH64
347 pc_offset = mov_pc_to(tmp);
348 str(tmp, Address(Rthread, JavaThread::last_Java_pc_offset()));
349 #else
350 str(PC, Address(Rthread, JavaThread::last_Java_pc_offset()));
351 pc_offset = offset() + VM_Version::stored_pc_adjustment();
352 #endif
353 _pc_saved = true;
354 } else {
355 _pc_saved = false;
356 pc_offset = -1;
357 }
358 // According to comment in javaFrameAnchorm SP must be saved last, so that other
359 // entries are valid when SP is set.
360
361 // However, this is probably not a strong constrainst since for instance PC is
362 // sometimes read from the stack at SP... but is pushed later (by the call). Hence,
363 // we now write the fields in the expected order but we have not added a StoreStore
364 // barrier.
365
366 // XXX: if the ordering is really important, PC should always be saved (without forgetting
367 // to update oop_map offsets) and a StoreStore barrier might be needed.
368
369 if (last_java_sp == noreg) {
370 last_java_sp = SP; // always saved
371 }
372 #ifdef AARCH64
373 if (last_java_sp == SP) {
374 mov(tmp, SP);
375 str(tmp, Address(Rthread, JavaThread::last_Java_sp_offset()));
376 } else {
377 str(last_java_sp, Address(Rthread, JavaThread::last_Java_sp_offset()));
378 }
379 #else
380 str(last_java_sp, Address(Rthread, JavaThread::last_Java_sp_offset()));
381 #endif
382
383 return pc_offset; // for oopmaps
384 }
385
reset_last_Java_frame(Register tmp)386 void MacroAssembler::reset_last_Java_frame(Register tmp) {
387 const Register Rzero = zero_register(tmp);
388 str(Rzero, Address(Rthread, JavaThread::last_Java_sp_offset()));
389 if (_fp_saved) {
390 str(Rzero, Address(Rthread, JavaThread::last_Java_fp_offset()));
391 }
392 if (_pc_saved) {
393 str(Rzero, Address(Rthread, JavaThread::last_Java_pc_offset()));
394 }
395 }
396
397
398 // Implementation of call_VM versions
399
call_VM_leaf_helper(address entry_point,int number_of_arguments)400 void MacroAssembler::call_VM_leaf_helper(address entry_point, int number_of_arguments) {
401 assert(number_of_arguments >= 0, "cannot have negative number of arguments");
402 assert(number_of_arguments <= 4, "cannot have more than 4 arguments");
403
404 #ifndef AARCH64
405 // Safer to save R9 here since callers may have been written
406 // assuming R9 survives. This is suboptimal but is not worth
407 // optimizing for the few platforms where R9 is scratched.
408 push(RegisterSet(R4) | R9ifScratched);
409 mov(R4, SP);
410 bic(SP, SP, StackAlignmentInBytes - 1);
411 #endif // AARCH64
412 call(entry_point, relocInfo::runtime_call_type);
413 #ifndef AARCH64
414 mov(SP, R4);
415 pop(RegisterSet(R4) | R9ifScratched);
416 #endif // AARCH64
417 }
418
419
call_VM_helper(Register oop_result,address entry_point,int number_of_arguments,bool check_exceptions)420 void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) {
421 assert(number_of_arguments >= 0, "cannot have negative number of arguments");
422 assert(number_of_arguments <= 3, "cannot have more than 3 arguments");
423
424 const Register tmp = Rtemp;
425 assert_different_registers(oop_result, tmp);
426
427 set_last_Java_frame(SP, FP, true, tmp);
428
429 #ifdef ASSERT
430 AARCH64_ONLY(if (UseCompressedOops || UseCompressedClassPointers) { verify_heapbase("call_VM_helper: heap base corrupted?"); });
431 #endif // ASSERT
432
433 #ifndef AARCH64
434 #if R9_IS_SCRATCHED
435 // Safer to save R9 here since callers may have been written
436 // assuming R9 survives. This is suboptimal but is not worth
437 // optimizing for the few platforms where R9 is scratched.
438
439 // Note: cannot save R9 above the saved SP (some calls expect for
440 // instance the Java stack top at the saved SP)
441 // => once saved (with set_last_Java_frame), decrease SP before rounding to
442 // ensure the slot at SP will be free for R9).
443 sub(SP, SP, 4);
444 bic(SP, SP, StackAlignmentInBytes - 1);
445 str(R9, Address(SP, 0));
446 #else
447 bic(SP, SP, StackAlignmentInBytes - 1);
448 #endif // R9_IS_SCRATCHED
449 #endif
450
451 mov(R0, Rthread);
452 call(entry_point, relocInfo::runtime_call_type);
453
454 #ifndef AARCH64
455 #if R9_IS_SCRATCHED
456 ldr(R9, Address(SP, 0));
457 #endif
458 ldr(SP, Address(Rthread, JavaThread::last_Java_sp_offset()));
459 #endif
460
461 reset_last_Java_frame(tmp);
462
463 // C++ interp handles this in the interpreter
464 check_and_handle_popframe();
465 check_and_handle_earlyret();
466
467 if (check_exceptions) {
468 // check for pending exceptions
469 ldr(tmp, Address(Rthread, Thread::pending_exception_offset()));
470 #ifdef AARCH64
471 Label L;
472 cbz(tmp, L);
473 mov_pc_to(Rexception_pc);
474 b(StubRoutines::forward_exception_entry());
475 bind(L);
476 #else
477 cmp(tmp, 0);
478 mov(Rexception_pc, PC, ne);
479 b(StubRoutines::forward_exception_entry(), ne);
480 #endif // AARCH64
481 }
482
483 // get oop result if there is one and reset the value in the thread
484 if (oop_result->is_valid()) {
485 get_vm_result(oop_result, tmp);
486 }
487 }
488
call_VM(Register oop_result,address entry_point,bool check_exceptions)489 void MacroAssembler::call_VM(Register oop_result, address entry_point, bool check_exceptions) {
490 call_VM_helper(oop_result, entry_point, 0, check_exceptions);
491 }
492
493
call_VM(Register oop_result,address entry_point,Register arg_1,bool check_exceptions)494 void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, bool check_exceptions) {
495 assert (arg_1 == R1, "fixed register for arg_1");
496 call_VM_helper(oop_result, entry_point, 1, check_exceptions);
497 }
498
499
call_VM(Register oop_result,address entry_point,Register arg_1,Register arg_2,bool check_exceptions)500 void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, bool check_exceptions) {
501 assert (arg_1 == R1, "fixed register for arg_1");
502 assert (arg_2 == R2, "fixed register for arg_2");
503 call_VM_helper(oop_result, entry_point, 2, check_exceptions);
504 }
505
506
call_VM(Register oop_result,address entry_point,Register arg_1,Register arg_2,Register arg_3,bool check_exceptions)507 void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions) {
508 assert (arg_1 == R1, "fixed register for arg_1");
509 assert (arg_2 == R2, "fixed register for arg_2");
510 assert (arg_3 == R3, "fixed register for arg_3");
511 call_VM_helper(oop_result, entry_point, 3, check_exceptions);
512 }
513
514
call_VM(Register oop_result,Register last_java_sp,address entry_point,int number_of_arguments,bool check_exceptions)515 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, int number_of_arguments, bool check_exceptions) {
516 // Not used on ARM
517 Unimplemented();
518 }
519
520
call_VM(Register oop_result,Register last_java_sp,address entry_point,Register arg_1,bool check_exceptions)521 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, bool check_exceptions) {
522 // Not used on ARM
523 Unimplemented();
524 }
525
526
call_VM(Register oop_result,Register last_java_sp,address entry_point,Register arg_1,Register arg_2,bool check_exceptions)527 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, bool check_exceptions) {
528 // Not used on ARM
529 Unimplemented();
530 }
531
532
call_VM(Register oop_result,Register last_java_sp,address entry_point,Register arg_1,Register arg_2,Register arg_3,bool check_exceptions)533 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions) {
534 // Not used on ARM
535 Unimplemented();
536 }
537
538 // Raw call, without saving/restoring registers, exception handling, etc.
539 // Mainly used from various stubs.
call_VM(address entry_point,bool save_R9_if_scratched)540 void MacroAssembler::call_VM(address entry_point, bool save_R9_if_scratched) {
541 const Register tmp = Rtemp; // Rtemp free since scratched by call
542 set_last_Java_frame(SP, FP, true, tmp);
543 #if R9_IS_SCRATCHED
544 if (save_R9_if_scratched) {
545 // Note: Saving also R10 for alignment.
546 push(RegisterSet(R9, R10));
547 }
548 #endif
549 mov(R0, Rthread);
550 call(entry_point, relocInfo::runtime_call_type);
551 #if R9_IS_SCRATCHED
552 if (save_R9_if_scratched) {
553 pop(RegisterSet(R9, R10));
554 }
555 #endif
556 reset_last_Java_frame(tmp);
557 }
558
call_VM_leaf(address entry_point)559 void MacroAssembler::call_VM_leaf(address entry_point) {
560 call_VM_leaf_helper(entry_point, 0);
561 }
562
call_VM_leaf(address entry_point,Register arg_1)563 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1) {
564 assert (arg_1 == R0, "fixed register for arg_1");
565 call_VM_leaf_helper(entry_point, 1);
566 }
567
call_VM_leaf(address entry_point,Register arg_1,Register arg_2)568 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2) {
569 assert (arg_1 == R0, "fixed register for arg_1");
570 assert (arg_2 == R1, "fixed register for arg_2");
571 call_VM_leaf_helper(entry_point, 2);
572 }
573
call_VM_leaf(address entry_point,Register arg_1,Register arg_2,Register arg_3)574 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3) {
575 assert (arg_1 == R0, "fixed register for arg_1");
576 assert (arg_2 == R1, "fixed register for arg_2");
577 assert (arg_3 == R2, "fixed register for arg_3");
578 call_VM_leaf_helper(entry_point, 3);
579 }
580
call_VM_leaf(address entry_point,Register arg_1,Register arg_2,Register arg_3,Register arg_4)581 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3, Register arg_4) {
582 assert (arg_1 == R0, "fixed register for arg_1");
583 assert (arg_2 == R1, "fixed register for arg_2");
584 assert (arg_3 == R2, "fixed register for arg_3");
585 assert (arg_4 == R3, "fixed register for arg_4");
586 call_VM_leaf_helper(entry_point, 4);
587 }
588
get_vm_result(Register oop_result,Register tmp)589 void MacroAssembler::get_vm_result(Register oop_result, Register tmp) {
590 assert_different_registers(oop_result, tmp);
591 ldr(oop_result, Address(Rthread, JavaThread::vm_result_offset()));
592 str(zero_register(tmp), Address(Rthread, JavaThread::vm_result_offset()));
593 verify_oop(oop_result);
594 }
595
get_vm_result_2(Register metadata_result,Register tmp)596 void MacroAssembler::get_vm_result_2(Register metadata_result, Register tmp) {
597 assert_different_registers(metadata_result, tmp);
598 ldr(metadata_result, Address(Rthread, JavaThread::vm_result_2_offset()));
599 str(zero_register(tmp), Address(Rthread, JavaThread::vm_result_2_offset()));
600 }
601
add_rc(Register dst,Register arg1,RegisterOrConstant arg2)602 void MacroAssembler::add_rc(Register dst, Register arg1, RegisterOrConstant arg2) {
603 if (arg2.is_register()) {
604 add(dst, arg1, arg2.as_register());
605 } else {
606 add(dst, arg1, arg2.as_constant());
607 }
608 }
609
add_slow(Register rd,Register rn,int c)610 void MacroAssembler::add_slow(Register rd, Register rn, int c) {
611 #ifdef AARCH64
612 if (c == 0) {
613 if (rd != rn) {
614 mov(rd, rn);
615 }
616 return;
617 }
618 if (c < 0) {
619 sub_slow(rd, rn, -c);
620 return;
621 }
622 if (c > right_n_bits(24)) {
623 guarantee(rd != rn, "no large add_slow with only one register");
624 mov_slow(rd, c);
625 add(rd, rn, rd);
626 } else {
627 int lo = c & right_n_bits(12);
628 int hi = (c >> 12) & right_n_bits(12);
629 if (lo != 0) {
630 add(rd, rn, lo, lsl0);
631 }
632 if (hi != 0) {
633 add(rd, (lo == 0) ? rn : rd, hi, lsl12);
634 }
635 }
636 #else
637 // This function is used in compiler for handling large frame offsets
638 if ((c < 0) && (((-c) & ~0x3fc) == 0)) {
639 return sub(rd, rn, (-c));
640 }
641 int low = c & 0x3fc;
642 if (low != 0) {
643 add(rd, rn, low);
644 rn = rd;
645 }
646 if (c & ~0x3fc) {
647 assert(AsmOperand::is_rotated_imm(c & ~0x3fc), "unsupported add_slow offset %d", c);
648 add(rd, rn, c & ~0x3fc);
649 } else if (rd != rn) {
650 assert(c == 0, "");
651 mov(rd, rn); // need to generate at least one move!
652 }
653 #endif // AARCH64
654 }
655
sub_slow(Register rd,Register rn,int c)656 void MacroAssembler::sub_slow(Register rd, Register rn, int c) {
657 #ifdef AARCH64
658 if (c <= 0) {
659 add_slow(rd, rn, -c);
660 return;
661 }
662 if (c > right_n_bits(24)) {
663 guarantee(rd != rn, "no large sub_slow with only one register");
664 mov_slow(rd, c);
665 sub(rd, rn, rd);
666 } else {
667 int lo = c & right_n_bits(12);
668 int hi = (c >> 12) & right_n_bits(12);
669 if (lo != 0) {
670 sub(rd, rn, lo, lsl0);
671 }
672 if (hi != 0) {
673 sub(rd, (lo == 0) ? rn : rd, hi, lsl12);
674 }
675 }
676 #else
677 // This function is used in compiler for handling large frame offsets
678 if ((c < 0) && (((-c) & ~0x3fc) == 0)) {
679 return add(rd, rn, (-c));
680 }
681 int low = c & 0x3fc;
682 if (low != 0) {
683 sub(rd, rn, low);
684 rn = rd;
685 }
686 if (c & ~0x3fc) {
687 assert(AsmOperand::is_rotated_imm(c & ~0x3fc), "unsupported sub_slow offset %d", c);
688 sub(rd, rn, c & ~0x3fc);
689 } else if (rd != rn) {
690 assert(c == 0, "");
691 mov(rd, rn); // need to generate at least one move!
692 }
693 #endif // AARCH64
694 }
695
mov_slow(Register rd,address addr)696 void MacroAssembler::mov_slow(Register rd, address addr) {
697 // do *not* call the non relocated mov_related_address
698 mov_slow(rd, (intptr_t)addr);
699 }
700
mov_slow(Register rd,const char * str)701 void MacroAssembler::mov_slow(Register rd, const char *str) {
702 mov_slow(rd, (intptr_t)str);
703 }
704
705 #ifdef AARCH64
706
707 // Common code for mov_slow and instr_count_for_mov_slow.
708 // Returns number of instructions of mov_slow pattern,
709 // generating it if non-null MacroAssembler is given.
mov_slow_helper(Register rd,intptr_t c,MacroAssembler * masm)710 int MacroAssembler::mov_slow_helper(Register rd, intptr_t c, MacroAssembler* masm) {
711 // This code pattern is matched in NativeIntruction::is_mov_slow.
712 // Update it at modifications.
713
714 const intx mask = right_n_bits(16);
715 // 1 movz instruction
716 for (int base_shift = 0; base_shift < 64; base_shift += 16) {
717 if ((c & ~(mask << base_shift)) == 0) {
718 if (masm != NULL) {
719 masm->movz(rd, ((uintx)c) >> base_shift, base_shift);
720 }
721 return 1;
722 }
723 }
724 // 1 movn instruction
725 for (int base_shift = 0; base_shift < 64; base_shift += 16) {
726 if (((~c) & ~(mask << base_shift)) == 0) {
727 if (masm != NULL) {
728 masm->movn(rd, ((uintx)(~c)) >> base_shift, base_shift);
729 }
730 return 1;
731 }
732 }
733 // 1 orr instruction
734 {
735 LogicalImmediate imm(c, false);
736 if (imm.is_encoded()) {
737 if (masm != NULL) {
738 masm->orr(rd, ZR, imm);
739 }
740 return 1;
741 }
742 }
743 // 1 movz/movn + up to 3 movk instructions
744 int zeroes = 0;
745 int ones = 0;
746 for (int base_shift = 0; base_shift < 64; base_shift += 16) {
747 int part = (c >> base_shift) & mask;
748 if (part == 0) {
749 ++zeroes;
750 } else if (part == mask) {
751 ++ones;
752 }
753 }
754 int def_bits = 0;
755 if (ones > zeroes) {
756 def_bits = mask;
757 }
758 int inst_count = 0;
759 for (int base_shift = 0; base_shift < 64; base_shift += 16) {
760 int part = (c >> base_shift) & mask;
761 if (part != def_bits) {
762 if (masm != NULL) {
763 if (inst_count > 0) {
764 masm->movk(rd, part, base_shift);
765 } else {
766 if (def_bits == 0) {
767 masm->movz(rd, part, base_shift);
768 } else {
769 masm->movn(rd, ~part & mask, base_shift);
770 }
771 }
772 }
773 inst_count++;
774 }
775 }
776 assert((1 <= inst_count) && (inst_count <= 4), "incorrect number of instructions");
777 return inst_count;
778 }
779
mov_slow(Register rd,intptr_t c)780 void MacroAssembler::mov_slow(Register rd, intptr_t c) {
781 #ifdef ASSERT
782 int off = offset();
783 #endif
784 (void) mov_slow_helper(rd, c, this);
785 assert(offset() - off == instr_count_for_mov_slow(c) * InstructionSize, "size mismatch");
786 }
787
788 // Counts instructions generated by mov_slow(rd, c).
instr_count_for_mov_slow(intptr_t c)789 int MacroAssembler::instr_count_for_mov_slow(intptr_t c) {
790 return mov_slow_helper(noreg, c, NULL);
791 }
792
instr_count_for_mov_slow(address c)793 int MacroAssembler::instr_count_for_mov_slow(address c) {
794 return mov_slow_helper(noreg, (intptr_t)c, NULL);
795 }
796
797 #else
798
mov_slow(Register rd,intptr_t c,AsmCondition cond)799 void MacroAssembler::mov_slow(Register rd, intptr_t c, AsmCondition cond) {
800 if (AsmOperand::is_rotated_imm(c)) {
801 mov(rd, c, cond);
802 } else if (AsmOperand::is_rotated_imm(~c)) {
803 mvn(rd, ~c, cond);
804 } else if (VM_Version::supports_movw()) {
805 movw(rd, c & 0xffff, cond);
806 if ((unsigned int)c >> 16) {
807 movt(rd, (unsigned int)c >> 16, cond);
808 }
809 } else {
810 // Find first non-zero bit
811 int shift = 0;
812 while ((c & (3 << shift)) == 0) {
813 shift += 2;
814 }
815 // Put the least significant part of the constant
816 int mask = 0xff << shift;
817 mov(rd, c & mask, cond);
818 // Add up to 3 other parts of the constant;
819 // each of them can be represented as rotated_imm
820 if (c & (mask << 8)) {
821 orr(rd, rd, c & (mask << 8), cond);
822 }
823 if (c & (mask << 16)) {
824 orr(rd, rd, c & (mask << 16), cond);
825 }
826 if (c & (mask << 24)) {
827 orr(rd, rd, c & (mask << 24), cond);
828 }
829 }
830 }
831
832 #endif // AARCH64
833
mov_oop(Register rd,jobject o,int oop_index,bool patchable)834 void MacroAssembler::mov_oop(Register rd, jobject o, int oop_index,
835 #ifdef AARCH64
836 bool patchable
837 #else
838 AsmCondition cond
839 #endif
840 ) {
841
842 if (o == NULL) {
843 #ifdef AARCH64
844 if (patchable) {
845 nop();
846 }
847 mov(rd, ZR);
848 #else
849 mov(rd, 0, cond);
850 #endif
851 return;
852 }
853
854 if (oop_index == 0) {
855 oop_index = oop_recorder()->allocate_oop_index(o);
856 }
857 relocate(oop_Relocation::spec(oop_index));
858
859 #ifdef AARCH64
860 if (patchable) {
861 nop();
862 }
863 ldr(rd, pc());
864 #else
865 if (VM_Version::supports_movw()) {
866 movw(rd, 0, cond);
867 movt(rd, 0, cond);
868 } else {
869 ldr(rd, Address(PC), cond);
870 // Extra nop to handle case of large offset of oop placeholder (see NativeMovConstReg::set_data).
871 nop();
872 }
873 #endif
874 }
875
mov_metadata(Register rd,Metadata * o,int metadata_index AARCH64_ONLY_ARG (bool patchable))876 void MacroAssembler::mov_metadata(Register rd, Metadata* o, int metadata_index AARCH64_ONLY_ARG(bool patchable)) {
877 if (o == NULL) {
878 #ifdef AARCH64
879 if (patchable) {
880 nop();
881 }
882 #endif
883 mov(rd, 0);
884 return;
885 }
886
887 if (metadata_index == 0) {
888 metadata_index = oop_recorder()->allocate_metadata_index(o);
889 }
890 relocate(metadata_Relocation::spec(metadata_index));
891
892 #ifdef AARCH64
893 if (patchable) {
894 nop();
895 }
896 #ifdef COMPILER2
897 if (!patchable && VM_Version::prefer_moves_over_load_literal()) {
898 mov_slow(rd, (address)o);
899 return;
900 }
901 #endif
902 ldr(rd, pc());
903 #else
904 if (VM_Version::supports_movw()) {
905 movw(rd, ((int)o) & 0xffff);
906 movt(rd, (unsigned int)o >> 16);
907 } else {
908 ldr(rd, Address(PC));
909 // Extra nop to handle case of large offset of metadata placeholder (see NativeMovConstReg::set_data).
910 nop();
911 }
912 #endif // AARCH64
913 }
914
mov_float(FloatRegister fd,jfloat c NOT_AARCH64_ARG (AsmCondition cond))915 void MacroAssembler::mov_float(FloatRegister fd, jfloat c NOT_AARCH64_ARG(AsmCondition cond)) {
916 Label skip_constant;
917 union {
918 jfloat f;
919 jint i;
920 } accessor;
921 accessor.f = c;
922
923 #ifdef AARCH64
924 // TODO-AARCH64 - try to optimize loading of float constants with fmov and/or mov_slow
925 Label L;
926 ldr_s(fd, target(L));
927 b(skip_constant);
928 bind(L);
929 emit_int32(accessor.i);
930 bind(skip_constant);
931 #else
932 flds(fd, Address(PC), cond);
933 b(skip_constant);
934 emit_int32(accessor.i);
935 bind(skip_constant);
936 #endif // AARCH64
937 }
938
mov_double(FloatRegister fd,jdouble c NOT_AARCH64_ARG (AsmCondition cond))939 void MacroAssembler::mov_double(FloatRegister fd, jdouble c NOT_AARCH64_ARG(AsmCondition cond)) {
940 Label skip_constant;
941 union {
942 jdouble d;
943 jint i[2];
944 } accessor;
945 accessor.d = c;
946
947 #ifdef AARCH64
948 // TODO-AARCH64 - try to optimize loading of double constants with fmov
949 Label L;
950 ldr_d(fd, target(L));
951 b(skip_constant);
952 align(wordSize);
953 bind(L);
954 emit_int32(accessor.i[0]);
955 emit_int32(accessor.i[1]);
956 bind(skip_constant);
957 #else
958 fldd(fd, Address(PC), cond);
959 b(skip_constant);
960 emit_int32(accessor.i[0]);
961 emit_int32(accessor.i[1]);
962 bind(skip_constant);
963 #endif // AARCH64
964 }
965
ldr_global_s32(Register reg,address address_of_global)966 void MacroAssembler::ldr_global_s32(Register reg, address address_of_global) {
967 intptr_t addr = (intptr_t) address_of_global;
968 #ifdef AARCH64
969 assert((addr & 0x3) == 0, "address should be aligned");
970
971 // FIXME: TODO
972 if (false && page_reachable_from_cache(address_of_global)) {
973 assert(false,"TODO: relocate");
974 //relocate();
975 adrp(reg, address_of_global);
976 ldrsw(reg, Address(reg, addr & 0xfff));
977 } else {
978 mov_slow(reg, addr & ~0x3fff);
979 ldrsw(reg, Address(reg, addr & 0x3fff));
980 }
981 #else
982 mov_slow(reg, addr & ~0xfff);
983 ldr(reg, Address(reg, addr & 0xfff));
984 #endif
985 }
986
ldr_global_ptr(Register reg,address address_of_global)987 void MacroAssembler::ldr_global_ptr(Register reg, address address_of_global) {
988 #ifdef AARCH64
989 intptr_t addr = (intptr_t) address_of_global;
990 assert ((addr & 0x7) == 0, "address should be aligned");
991 mov_slow(reg, addr & ~0x7fff);
992 ldr(reg, Address(reg, addr & 0x7fff));
993 #else
994 ldr_global_s32(reg, address_of_global);
995 #endif
996 }
997
ldrb_global(Register reg,address address_of_global)998 void MacroAssembler::ldrb_global(Register reg, address address_of_global) {
999 intptr_t addr = (intptr_t) address_of_global;
1000 mov_slow(reg, addr & ~0xfff);
1001 ldrb(reg, Address(reg, addr & 0xfff));
1002 }
1003
zero_extend(Register rd,Register rn,int bits)1004 void MacroAssembler::zero_extend(Register rd, Register rn, int bits) {
1005 #ifdef AARCH64
1006 switch (bits) {
1007 case 8: uxtb(rd, rn); break;
1008 case 16: uxth(rd, rn); break;
1009 case 32: mov_w(rd, rn); break;
1010 default: ShouldNotReachHere();
1011 }
1012 #else
1013 if (bits <= 8) {
1014 andr(rd, rn, (1 << bits) - 1);
1015 } else if (bits >= 24) {
1016 bic(rd, rn, -1 << bits);
1017 } else {
1018 mov(rd, AsmOperand(rn, lsl, 32 - bits));
1019 mov(rd, AsmOperand(rd, lsr, 32 - bits));
1020 }
1021 #endif
1022 }
1023
sign_extend(Register rd,Register rn,int bits)1024 void MacroAssembler::sign_extend(Register rd, Register rn, int bits) {
1025 #ifdef AARCH64
1026 switch (bits) {
1027 case 8: sxtb(rd, rn); break;
1028 case 16: sxth(rd, rn); break;
1029 case 32: sxtw(rd, rn); break;
1030 default: ShouldNotReachHere();
1031 }
1032 #else
1033 mov(rd, AsmOperand(rn, lsl, 32 - bits));
1034 mov(rd, AsmOperand(rd, asr, 32 - bits));
1035 #endif
1036 }
1037
1038 #ifndef AARCH64
1039
long_move(Register rd_lo,Register rd_hi,Register rn_lo,Register rn_hi,AsmCondition cond)1040 void MacroAssembler::long_move(Register rd_lo, Register rd_hi,
1041 Register rn_lo, Register rn_hi,
1042 AsmCondition cond) {
1043 if (rd_lo != rn_hi) {
1044 if (rd_lo != rn_lo) { mov(rd_lo, rn_lo, cond); }
1045 if (rd_hi != rn_hi) { mov(rd_hi, rn_hi, cond); }
1046 } else if (rd_hi != rn_lo) {
1047 if (rd_hi != rn_hi) { mov(rd_hi, rn_hi, cond); }
1048 if (rd_lo != rn_lo) { mov(rd_lo, rn_lo, cond); }
1049 } else {
1050 eor(rd_lo, rd_hi, rd_lo, cond);
1051 eor(rd_hi, rd_lo, rd_hi, cond);
1052 eor(rd_lo, rd_hi, rd_lo, cond);
1053 }
1054 }
1055
long_shift(Register rd_lo,Register rd_hi,Register rn_lo,Register rn_hi,AsmShift shift,Register count)1056 void MacroAssembler::long_shift(Register rd_lo, Register rd_hi,
1057 Register rn_lo, Register rn_hi,
1058 AsmShift shift, Register count) {
1059 Register tmp;
1060 if (rd_lo != rn_lo && rd_lo != rn_hi && rd_lo != count) {
1061 tmp = rd_lo;
1062 } else {
1063 tmp = rd_hi;
1064 }
1065 assert_different_registers(tmp, count, rn_lo, rn_hi);
1066
1067 subs(tmp, count, 32);
1068 if (shift == lsl) {
1069 assert_different_registers(rd_hi, rn_lo);
1070 assert_different_registers(count, rd_hi);
1071 mov(rd_hi, AsmOperand(rn_lo, shift, tmp), pl);
1072 rsb(tmp, count, 32, mi);
1073 if (rd_hi == rn_hi) {
1074 mov(rd_hi, AsmOperand(rn_hi, lsl, count), mi);
1075 orr(rd_hi, rd_hi, AsmOperand(rn_lo, lsr, tmp), mi);
1076 } else {
1077 mov(rd_hi, AsmOperand(rn_lo, lsr, tmp), mi);
1078 orr(rd_hi, rd_hi, AsmOperand(rn_hi, lsl, count), mi);
1079 }
1080 mov(rd_lo, AsmOperand(rn_lo, shift, count));
1081 } else {
1082 assert_different_registers(rd_lo, rn_hi);
1083 assert_different_registers(rd_lo, count);
1084 mov(rd_lo, AsmOperand(rn_hi, shift, tmp), pl);
1085 rsb(tmp, count, 32, mi);
1086 if (rd_lo == rn_lo) {
1087 mov(rd_lo, AsmOperand(rn_lo, lsr, count), mi);
1088 orr(rd_lo, rd_lo, AsmOperand(rn_hi, lsl, tmp), mi);
1089 } else {
1090 mov(rd_lo, AsmOperand(rn_hi, lsl, tmp), mi);
1091 orr(rd_lo, rd_lo, AsmOperand(rn_lo, lsr, count), mi);
1092 }
1093 mov(rd_hi, AsmOperand(rn_hi, shift, count));
1094 }
1095 }
1096
long_shift(Register rd_lo,Register rd_hi,Register rn_lo,Register rn_hi,AsmShift shift,int count)1097 void MacroAssembler::long_shift(Register rd_lo, Register rd_hi,
1098 Register rn_lo, Register rn_hi,
1099 AsmShift shift, int count) {
1100 assert(count != 0 && (count & ~63) == 0, "must be");
1101
1102 if (shift == lsl) {
1103 assert_different_registers(rd_hi, rn_lo);
1104 if (count >= 32) {
1105 mov(rd_hi, AsmOperand(rn_lo, lsl, count - 32));
1106 mov(rd_lo, 0);
1107 } else {
1108 mov(rd_hi, AsmOperand(rn_hi, lsl, count));
1109 orr(rd_hi, rd_hi, AsmOperand(rn_lo, lsr, 32 - count));
1110 mov(rd_lo, AsmOperand(rn_lo, lsl, count));
1111 }
1112 } else {
1113 assert_different_registers(rd_lo, rn_hi);
1114 if (count >= 32) {
1115 if (count == 32) {
1116 mov(rd_lo, rn_hi);
1117 } else {
1118 mov(rd_lo, AsmOperand(rn_hi, shift, count - 32));
1119 }
1120 if (shift == asr) {
1121 mov(rd_hi, AsmOperand(rn_hi, asr, 0));
1122 } else {
1123 mov(rd_hi, 0);
1124 }
1125 } else {
1126 mov(rd_lo, AsmOperand(rn_lo, lsr, count));
1127 orr(rd_lo, rd_lo, AsmOperand(rn_hi, lsl, 32 - count));
1128 mov(rd_hi, AsmOperand(rn_hi, shift, count));
1129 }
1130 }
1131 }
1132 #endif // !AARCH64
1133
_verify_oop(Register reg,const char * s,const char * file,int line)1134 void MacroAssembler::_verify_oop(Register reg, const char* s, const char* file, int line) {
1135 // This code pattern is matched in NativeIntruction::skip_verify_oop.
1136 // Update it at modifications.
1137 if (!VerifyOops) return;
1138
1139 char buffer[64];
1140 #ifdef COMPILER1
1141 if (CommentedAssembly) {
1142 snprintf(buffer, sizeof(buffer), "verify_oop at %d", offset());
1143 block_comment(buffer);
1144 }
1145 #endif
1146 const char* msg_buffer = NULL;
1147 {
1148 ResourceMark rm;
1149 stringStream ss;
1150 ss.print("%s at offset %d (%s:%d)", s, offset(), file, line);
1151 msg_buffer = code_string(ss.as_string());
1152 }
1153
1154 save_all_registers();
1155
1156 if (reg != R2) {
1157 mov(R2, reg); // oop to verify
1158 }
1159 mov(R1, SP); // register save area
1160
1161 Label done;
1162 InlinedString Lmsg(msg_buffer);
1163 ldr_literal(R0, Lmsg); // message
1164
1165 // call indirectly to solve generation ordering problem
1166 ldr_global_ptr(Rtemp, StubRoutines::verify_oop_subroutine_entry_address());
1167 call(Rtemp);
1168
1169 restore_all_registers();
1170
1171 b(done);
1172 #ifdef COMPILER2
1173 int off = offset();
1174 #endif
1175 bind_literal(Lmsg);
1176 #ifdef COMPILER2
1177 if (offset() - off == 1 * wordSize) {
1178 // no padding, so insert nop for worst-case sizing
1179 nop();
1180 }
1181 #endif
1182 bind(done);
1183 }
1184
_verify_oop_addr(Address addr,const char * s,const char * file,int line)1185 void MacroAssembler::_verify_oop_addr(Address addr, const char* s, const char* file, int line) {
1186 if (!VerifyOops) return;
1187
1188 const char* msg_buffer = NULL;
1189 {
1190 ResourceMark rm;
1191 stringStream ss;
1192 if ((addr.base() == SP) && (addr.index()==noreg)) {
1193 ss.print("verify_oop_addr SP[%d]: %s", (int)addr.disp(), s);
1194 } else {
1195 ss.print("verify_oop_addr: %s", s);
1196 }
1197 ss.print(" (%s:%d)", file, line);
1198 msg_buffer = code_string(ss.as_string());
1199 }
1200
1201 int push_size = save_all_registers();
1202
1203 if (addr.base() == SP) {
1204 // computes an addr that takes into account the push
1205 if (addr.index() != noreg) {
1206 Register new_base = addr.index() == R2 ? R1 : R2; // avoid corrupting the index
1207 add(new_base, SP, push_size);
1208 addr = addr.rebase(new_base);
1209 } else {
1210 addr = addr.plus_disp(push_size);
1211 }
1212 }
1213
1214 ldr(R2, addr); // oop to verify
1215 mov(R1, SP); // register save area
1216
1217 Label done;
1218 InlinedString Lmsg(msg_buffer);
1219 ldr_literal(R0, Lmsg); // message
1220
1221 // call indirectly to solve generation ordering problem
1222 ldr_global_ptr(Rtemp, StubRoutines::verify_oop_subroutine_entry_address());
1223 call(Rtemp);
1224
1225 restore_all_registers();
1226
1227 b(done);
1228 bind_literal(Lmsg);
1229 bind(done);
1230 }
1231
c2bool(Register x)1232 void MacroAssembler::c2bool(Register x) {
1233 tst(x, 0xff); // Only look at the lowest byte
1234 #ifdef AARCH64
1235 cset(x, ne);
1236 #else
1237 mov(x, 1, ne);
1238 #endif
1239 }
1240
null_check(Register reg,Register tmp,int offset)1241 void MacroAssembler::null_check(Register reg, Register tmp, int offset) {
1242 if (needs_explicit_null_check(offset)) {
1243 #ifdef AARCH64
1244 ldr(ZR, Address(reg));
1245 #else
1246 assert_different_registers(reg, tmp);
1247 if (tmp == noreg) {
1248 tmp = Rtemp;
1249 assert((! Thread::current()->is_Compiler_thread()) ||
1250 (! (ciEnv::current()->task() == NULL)) ||
1251 (! (ciEnv::current()->comp_level() == CompLevel_full_optimization)),
1252 "Rtemp not available in C2"); // explicit tmp register required
1253 // XXX: could we mark the code buffer as not compatible with C2 ?
1254 }
1255 ldr(tmp, Address(reg));
1256 #endif
1257 }
1258 }
1259
1260 // Puts address of allocated object into register `obj` and end of allocated object into register `obj_end`.
eden_allocate(Register obj,Register obj_end,Register tmp1,Register tmp2,RegisterOrConstant size_expression,Label & slow_case)1261 void MacroAssembler::eden_allocate(Register obj, Register obj_end, Register tmp1, Register tmp2,
1262 RegisterOrConstant size_expression, Label& slow_case) {
1263 if (!Universe::heap()->supports_inline_contig_alloc()) {
1264 b(slow_case);
1265 return;
1266 }
1267
1268 CollectedHeap* ch = Universe::heap();
1269
1270 const Register top_addr = tmp1;
1271 const Register heap_end = tmp2;
1272
1273 if (size_expression.is_register()) {
1274 assert_different_registers(obj, obj_end, top_addr, heap_end, size_expression.as_register());
1275 } else {
1276 assert_different_registers(obj, obj_end, top_addr, heap_end);
1277 }
1278
1279 bool load_const = AARCH64_ONLY(false) NOT_AARCH64(VM_Version::supports_movw() ); // TODO-AARCH64 check performance
1280 if (load_const) {
1281 mov_address(top_addr, (address)Universe::heap()->top_addr(), symbolic_Relocation::eden_top_reference);
1282 } else {
1283 ldr(top_addr, Address(Rthread, JavaThread::heap_top_addr_offset()));
1284 }
1285 // Calculate new heap_top by adding the size of the object
1286 Label retry;
1287 bind(retry);
1288
1289 #ifdef AARCH64
1290 ldxr(obj, top_addr);
1291 #else
1292 ldr(obj, Address(top_addr));
1293 #endif // AARCH64
1294
1295 ldr(heap_end, Address(top_addr, (intptr_t)ch->end_addr() - (intptr_t)ch->top_addr()));
1296 add_rc(obj_end, obj, size_expression);
1297 // Check if obj_end wrapped around, i.e., obj_end < obj. If yes, jump to the slow case.
1298 cmp(obj_end, obj);
1299 b(slow_case, lo);
1300 // Update heap_top if allocation succeeded
1301 cmp(obj_end, heap_end);
1302 b(slow_case, hi);
1303
1304 #ifdef AARCH64
1305 stxr(heap_end/*scratched*/, obj_end, top_addr);
1306 cbnz_w(heap_end, retry);
1307 #else
1308 atomic_cas_bool(obj, obj_end, top_addr, 0, heap_end/*scratched*/);
1309 b(retry, ne);
1310 #endif // AARCH64
1311 }
1312
1313 // Puts address of allocated object into register `obj` and end of allocated object into register `obj_end`.
tlab_allocate(Register obj,Register obj_end,Register tmp1,RegisterOrConstant size_expression,Label & slow_case)1314 void MacroAssembler::tlab_allocate(Register obj, Register obj_end, Register tmp1,
1315 RegisterOrConstant size_expression, Label& slow_case) {
1316 const Register tlab_end = tmp1;
1317 assert_different_registers(obj, obj_end, tlab_end);
1318
1319 ldr(obj, Address(Rthread, JavaThread::tlab_top_offset()));
1320 ldr(tlab_end, Address(Rthread, JavaThread::tlab_end_offset()));
1321 add_rc(obj_end, obj, size_expression);
1322 cmp(obj_end, tlab_end);
1323 b(slow_case, hi);
1324 str(obj_end, Address(Rthread, JavaThread::tlab_top_offset()));
1325 }
1326
1327 // Fills memory regions [start..end] with zeroes. Clobbers `start` and `tmp` registers.
zero_memory(Register start,Register end,Register tmp)1328 void MacroAssembler::zero_memory(Register start, Register end, Register tmp) {
1329 Label loop;
1330 const Register ptr = start;
1331
1332 #ifdef AARCH64
1333 // TODO-AARCH64 - compare performance of 2x word zeroing with simple 1x
1334 const Register size = tmp;
1335 Label remaining, done;
1336
1337 sub(size, end, start);
1338
1339 #ifdef ASSERT
1340 { Label L;
1341 tst(size, wordSize - 1);
1342 b(L, eq);
1343 stop("size is not a multiple of wordSize");
1344 bind(L);
1345 }
1346 #endif // ASSERT
1347
1348 subs(size, size, wordSize);
1349 b(remaining, le);
1350
1351 // Zero by 2 words per iteration.
1352 bind(loop);
1353 subs(size, size, 2*wordSize);
1354 stp(ZR, ZR, Address(ptr, 2*wordSize, post_indexed));
1355 b(loop, gt);
1356
1357 bind(remaining);
1358 b(done, ne);
1359 str(ZR, Address(ptr));
1360 bind(done);
1361 #else
1362 mov(tmp, 0);
1363 bind(loop);
1364 cmp(ptr, end);
1365 str(tmp, Address(ptr, wordSize, post_indexed), lo);
1366 b(loop, lo);
1367 #endif // AARCH64
1368 }
1369
incr_allocated_bytes(RegisterOrConstant size_in_bytes,Register tmp)1370 void MacroAssembler::incr_allocated_bytes(RegisterOrConstant size_in_bytes, Register tmp) {
1371 #ifdef AARCH64
1372 ldr(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset())));
1373 add_rc(tmp, tmp, size_in_bytes);
1374 str(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset())));
1375 #else
1376 // Bump total bytes allocated by this thread
1377 Label done;
1378
1379 // Borrow the Rthread for alloc counter
1380 Register Ralloc = Rthread;
1381 add(Ralloc, Ralloc, in_bytes(JavaThread::allocated_bytes_offset()));
1382 ldr(tmp, Address(Ralloc));
1383 adds(tmp, tmp, size_in_bytes);
1384 str(tmp, Address(Ralloc), cc);
1385 b(done, cc);
1386
1387 // Increment the high word and store single-copy atomically (that is an unlikely scenario on typical embedded systems as it means >4GB has been allocated)
1388 // To do so ldrd/strd instructions used which require an even-odd pair of registers. Such a request could be difficult to satisfy by
1389 // allocating those registers on a higher level, therefore the routine is ready to allocate a pair itself.
1390 Register low, high;
1391 // Select ether R0/R1 or R2/R3
1392
1393 if (size_in_bytes.is_register() && (size_in_bytes.as_register() == R0 || size_in_bytes.as_register() == R1)) {
1394 low = R2;
1395 high = R3;
1396 } else {
1397 low = R0;
1398 high = R1;
1399 }
1400 push(RegisterSet(low, high));
1401
1402 ldrd(low, Address(Ralloc));
1403 adds(low, low, size_in_bytes);
1404 adc(high, high, 0);
1405 strd(low, Address(Ralloc));
1406
1407 pop(RegisterSet(low, high));
1408
1409 bind(done);
1410
1411 // Unborrow the Rthread
1412 sub(Rthread, Ralloc, in_bytes(JavaThread::allocated_bytes_offset()));
1413 #endif // AARCH64
1414 }
1415
arm_stack_overflow_check(int frame_size_in_bytes,Register tmp)1416 void MacroAssembler::arm_stack_overflow_check(int frame_size_in_bytes, Register tmp) {
1417 // Version of AbstractAssembler::generate_stack_overflow_check optimized for ARM
1418 if (UseStackBanging) {
1419 const int page_size = os::vm_page_size();
1420
1421 sub_slow(tmp, SP, JavaThread::stack_shadow_zone_size());
1422 strb(R0, Address(tmp));
1423 #ifdef AARCH64
1424 for (; frame_size_in_bytes >= page_size; frame_size_in_bytes -= page_size) {
1425 sub(tmp, tmp, page_size);
1426 strb(R0, Address(tmp));
1427 }
1428 #else
1429 for (; frame_size_in_bytes >= page_size; frame_size_in_bytes -= 0xff0) {
1430 strb(R0, Address(tmp, -0xff0, pre_indexed));
1431 }
1432 #endif // AARCH64
1433 }
1434 }
1435
arm_stack_overflow_check(Register Rsize,Register tmp)1436 void MacroAssembler::arm_stack_overflow_check(Register Rsize, Register tmp) {
1437 if (UseStackBanging) {
1438 Label loop;
1439
1440 mov(tmp, SP);
1441 add_slow(Rsize, Rsize, JavaThread::stack_shadow_zone_size() - os::vm_page_size());
1442 #ifdef AARCH64
1443 sub(tmp, tmp, Rsize);
1444 bind(loop);
1445 subs(Rsize, Rsize, os::vm_page_size());
1446 strb(ZR, Address(tmp, Rsize));
1447 #else
1448 bind(loop);
1449 subs(Rsize, Rsize, 0xff0);
1450 strb(R0, Address(tmp, -0xff0, pre_indexed));
1451 #endif // AARCH64
1452 b(loop, hi);
1453 }
1454 }
1455
stop(const char * msg)1456 void MacroAssembler::stop(const char* msg) {
1457 // This code pattern is matched in NativeIntruction::is_stop.
1458 // Update it at modifications.
1459 #ifdef COMPILER1
1460 if (CommentedAssembly) {
1461 block_comment("stop");
1462 }
1463 #endif
1464
1465 InlinedAddress Ldebug(CAST_FROM_FN_PTR(address, MacroAssembler::debug));
1466 InlinedString Lmsg(msg);
1467
1468 // save all registers for further inspection
1469 save_all_registers();
1470
1471 ldr_literal(R0, Lmsg); // message
1472 mov(R1, SP); // register save area
1473
1474 #ifdef AARCH64
1475 ldr_literal(Rtemp, Ldebug);
1476 br(Rtemp);
1477 #else
1478 ldr_literal(PC, Ldebug); // call MacroAssembler::debug
1479 #endif // AARCH64
1480
1481 #if defined(COMPILER2) && defined(AARCH64)
1482 int off = offset();
1483 #endif
1484 bind_literal(Lmsg);
1485 bind_literal(Ldebug);
1486 #if defined(COMPILER2) && defined(AARCH64)
1487 if (offset() - off == 2 * wordSize) {
1488 // no padding, so insert nop for worst-case sizing
1489 nop();
1490 }
1491 #endif
1492 }
1493
warn(const char * msg)1494 void MacroAssembler::warn(const char* msg) {
1495 #ifdef COMPILER1
1496 if (CommentedAssembly) {
1497 block_comment("warn");
1498 }
1499 #endif
1500
1501 InlinedAddress Lwarn(CAST_FROM_FN_PTR(address, warning));
1502 InlinedString Lmsg(msg);
1503 Label done;
1504
1505 int push_size = save_caller_save_registers();
1506
1507 #ifdef AARCH64
1508 // TODO-AARCH64 - get rid of extra debug parameters
1509 mov(R1, LR);
1510 mov(R2, FP);
1511 add(R3, SP, push_size);
1512 #endif
1513
1514 ldr_literal(R0, Lmsg); // message
1515 ldr_literal(LR, Lwarn); // call warning
1516
1517 call(LR);
1518
1519 restore_caller_save_registers();
1520
1521 b(done);
1522 bind_literal(Lmsg);
1523 bind_literal(Lwarn);
1524 bind(done);
1525 }
1526
1527
save_all_registers()1528 int MacroAssembler::save_all_registers() {
1529 // This code pattern is matched in NativeIntruction::is_save_all_registers.
1530 // Update it at modifications.
1531 #ifdef AARCH64
1532 const Register tmp = Rtemp;
1533 raw_push(R30, ZR);
1534 for (int i = 28; i >= 0; i -= 2) {
1535 raw_push(as_Register(i), as_Register(i+1));
1536 }
1537 mov_pc_to(tmp);
1538 str(tmp, Address(SP, 31*wordSize));
1539 ldr(tmp, Address(SP, tmp->encoding()*wordSize));
1540 return 32*wordSize;
1541 #else
1542 push(RegisterSet(R0, R12) | RegisterSet(LR) | RegisterSet(PC));
1543 return 15*wordSize;
1544 #endif // AARCH64
1545 }
1546
restore_all_registers()1547 void MacroAssembler::restore_all_registers() {
1548 #ifdef AARCH64
1549 for (int i = 0; i <= 28; i += 2) {
1550 raw_pop(as_Register(i), as_Register(i+1));
1551 }
1552 raw_pop(R30, ZR);
1553 #else
1554 pop(RegisterSet(R0, R12) | RegisterSet(LR)); // restore registers
1555 add(SP, SP, wordSize); // discard saved PC
1556 #endif // AARCH64
1557 }
1558
save_caller_save_registers()1559 int MacroAssembler::save_caller_save_registers() {
1560 #ifdef AARCH64
1561 for (int i = 0; i <= 16; i += 2) {
1562 raw_push(as_Register(i), as_Register(i+1));
1563 }
1564 raw_push(R18, LR);
1565 return 20*wordSize;
1566 #else
1567 #if R9_IS_SCRATCHED
1568 // Save also R10 to preserve alignment
1569 push(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR) | RegisterSet(R9,R10));
1570 return 8*wordSize;
1571 #else
1572 push(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR));
1573 return 6*wordSize;
1574 #endif
1575 #endif // AARCH64
1576 }
1577
restore_caller_save_registers()1578 void MacroAssembler::restore_caller_save_registers() {
1579 #ifdef AARCH64
1580 raw_pop(R18, LR);
1581 for (int i = 16; i >= 0; i -= 2) {
1582 raw_pop(as_Register(i), as_Register(i+1));
1583 }
1584 #else
1585 #if R9_IS_SCRATCHED
1586 pop(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR) | RegisterSet(R9,R10));
1587 #else
1588 pop(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR));
1589 #endif
1590 #endif // AARCH64
1591 }
1592
debug(const char * msg,const intx * registers)1593 void MacroAssembler::debug(const char* msg, const intx* registers) {
1594 // In order to get locks to work, we need to fake a in_VM state
1595 JavaThread* thread = JavaThread::current();
1596 thread->set_thread_state(_thread_in_vm);
1597
1598 if (ShowMessageBoxOnError) {
1599 ttyLocker ttyl;
1600 if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
1601 BytecodeCounter::print();
1602 }
1603 if (os::message_box(msg, "Execution stopped, print registers?")) {
1604 #ifdef AARCH64
1605 // saved registers: R0-R30, PC
1606 const int nregs = 32;
1607 #else
1608 // saved registers: R0-R12, LR, PC
1609 const int nregs = 15;
1610 const Register regs[nregs] = {R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, PC};
1611 #endif // AARCH64
1612
1613 for (int i = 0; i < nregs AARCH64_ONLY(-1); i++) {
1614 tty->print_cr("%s = " INTPTR_FORMAT, AARCH64_ONLY(as_Register(i)) NOT_AARCH64(regs[i])->name(), registers[i]);
1615 }
1616
1617 #ifdef AARCH64
1618 tty->print_cr("pc = " INTPTR_FORMAT, registers[nregs-1]);
1619 #endif // AARCH64
1620
1621 // derive original SP value from the address of register save area
1622 tty->print_cr("%s = " INTPTR_FORMAT, SP->name(), p2i(®isters[nregs]));
1623 }
1624 BREAKPOINT;
1625 } else {
1626 ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg);
1627 }
1628 assert(false, "DEBUG MESSAGE: %s", msg);
1629 fatal("%s", msg); // returning from MacroAssembler::debug is not supported
1630 }
1631
unimplemented(const char * what)1632 void MacroAssembler::unimplemented(const char* what) {
1633 const char* buf = NULL;
1634 {
1635 ResourceMark rm;
1636 stringStream ss;
1637 ss.print("unimplemented: %s", what);
1638 buf = code_string(ss.as_string());
1639 }
1640 stop(buf);
1641 }
1642
1643
1644 // Implementation of FixedSizeCodeBlock
1645
FixedSizeCodeBlock(MacroAssembler * masm,int size_in_instrs,bool enabled)1646 FixedSizeCodeBlock::FixedSizeCodeBlock(MacroAssembler* masm, int size_in_instrs, bool enabled) :
1647 _masm(masm), _start(masm->pc()), _size_in_instrs(size_in_instrs), _enabled(enabled) {
1648 }
1649
~FixedSizeCodeBlock()1650 FixedSizeCodeBlock::~FixedSizeCodeBlock() {
1651 if (_enabled) {
1652 address curr_pc = _masm->pc();
1653
1654 assert(_start < curr_pc, "invalid current pc");
1655 guarantee(curr_pc <= _start + _size_in_instrs * Assembler::InstructionSize, "code block is too long");
1656
1657 int nops_count = (_start - curr_pc) / Assembler::InstructionSize + _size_in_instrs;
1658 for (int i = 0; i < nops_count; i++) {
1659 _masm->nop();
1660 }
1661 }
1662 }
1663
1664 #ifdef AARCH64
1665
1666 // Serializes memory.
1667 // tmp register is not used on AArch64, this parameter is provided solely for better compatibility with 32-bit ARM
membar(Membar_mask_bits order_constraint,Register tmp)1668 void MacroAssembler::membar(Membar_mask_bits order_constraint, Register tmp) {
1669 if (!os::is_MP()) return;
1670
1671 // TODO-AARCH64 investigate dsb vs dmb effects
1672 if (order_constraint == StoreStore) {
1673 dmb(DMB_st);
1674 } else if ((order_constraint & ~(LoadLoad | LoadStore)) == 0) {
1675 dmb(DMB_ld);
1676 } else {
1677 dmb(DMB_all);
1678 }
1679 }
1680
1681 #else
1682
1683 // Serializes memory. Potentially blows flags and reg.
1684 // tmp is a scratch for v6 co-processor write op (could be noreg for other architecure versions)
1685 // preserve_flags takes a longer path in LoadStore case (dmb rather then control dependency) to preserve status flags. Optional.
1686 // load_tgt is an ordered load target in a LoadStore case only, to create dependency between the load operation and conditional branch. Optional.
membar(Membar_mask_bits order_constraint,Register tmp,bool preserve_flags,Register load_tgt)1687 void MacroAssembler::membar(Membar_mask_bits order_constraint,
1688 Register tmp,
1689 bool preserve_flags,
1690 Register load_tgt) {
1691 if (!os::is_MP()) return;
1692
1693 if (order_constraint == StoreStore) {
1694 dmb(DMB_st, tmp);
1695 } else if ((order_constraint & StoreLoad) ||
1696 (order_constraint & LoadLoad) ||
1697 (order_constraint & StoreStore) ||
1698 (load_tgt == noreg) ||
1699 preserve_flags) {
1700 dmb(DMB_all, tmp);
1701 } else {
1702 // LoadStore: speculative stores reordeing is prohibited
1703
1704 // By providing an ordered load target register, we avoid an extra memory load reference
1705 Label not_taken;
1706 bind(not_taken);
1707 cmp(load_tgt, load_tgt);
1708 b(not_taken, ne);
1709 }
1710 }
1711
1712 #endif // AARCH64
1713
1714 // If "allow_fallthrough_on_failure" is false, we always branch to "slow_case"
1715 // on failure, so fall-through can only mean success.
1716 // "one_shot" controls whether we loop and retry to mitigate spurious failures.
1717 // This is only needed for C2, which for some reason does not rety,
1718 // while C1/interpreter does.
1719 // TODO: measure if it makes a difference
1720
cas_for_lock_acquire(Register oldval,Register newval,Register base,Register tmp,Label & slow_case,bool allow_fallthrough_on_failure,bool one_shot)1721 void MacroAssembler::cas_for_lock_acquire(Register oldval, Register newval,
1722 Register base, Register tmp, Label &slow_case,
1723 bool allow_fallthrough_on_failure, bool one_shot)
1724 {
1725
1726 bool fallthrough_is_success = false;
1727
1728 // ARM Litmus Test example does prefetching here.
1729 // TODO: investigate if it helps performance
1730
1731 // The last store was to the displaced header, so to prevent
1732 // reordering we must issue a StoreStore or Release barrier before
1733 // the CAS store.
1734
1735 #ifdef AARCH64
1736
1737 Register Rscratch = tmp;
1738 Register Roop = base;
1739 Register mark = oldval;
1740 Register Rbox = newval;
1741 Label loop;
1742
1743 assert(oopDesc::mark_offset_in_bytes() == 0, "must be");
1744
1745 // Instead of StoreStore here, we use store-release-exclusive below
1746
1747 bind(loop);
1748
1749 ldaxr(tmp, base); // acquire
1750 cmp(tmp, oldval);
1751 b(slow_case, ne);
1752 stlxr(tmp, newval, base); // release
1753 if (one_shot) {
1754 cmp_w(tmp, 0);
1755 } else {
1756 cbnz_w(tmp, loop);
1757 fallthrough_is_success = true;
1758 }
1759
1760 // MemBarAcquireLock would normally go here, but
1761 // we already do ldaxr+stlxr above, which has
1762 // Sequential Consistency
1763
1764 #else
1765 membar(MacroAssembler::StoreStore, noreg);
1766
1767 if (one_shot) {
1768 ldrex(tmp, Address(base, oopDesc::mark_offset_in_bytes()));
1769 cmp(tmp, oldval);
1770 strex(tmp, newval, Address(base, oopDesc::mark_offset_in_bytes()), eq);
1771 cmp(tmp, 0, eq);
1772 } else {
1773 atomic_cas_bool(oldval, newval, base, oopDesc::mark_offset_in_bytes(), tmp);
1774 }
1775
1776 // MemBarAcquireLock barrier
1777 // According to JSR-133 Cookbook, this should be LoadLoad | LoadStore,
1778 // but that doesn't prevent a load or store from floating up between
1779 // the load and store in the CAS sequence, so play it safe and
1780 // do a full fence.
1781 membar(Membar_mask_bits(LoadLoad | LoadStore | StoreStore | StoreLoad), noreg);
1782 #endif
1783 if (!fallthrough_is_success && !allow_fallthrough_on_failure) {
1784 b(slow_case, ne);
1785 }
1786 }
1787
cas_for_lock_release(Register oldval,Register newval,Register base,Register tmp,Label & slow_case,bool allow_fallthrough_on_failure,bool one_shot)1788 void MacroAssembler::cas_for_lock_release(Register oldval, Register newval,
1789 Register base, Register tmp, Label &slow_case,
1790 bool allow_fallthrough_on_failure, bool one_shot)
1791 {
1792
1793 bool fallthrough_is_success = false;
1794
1795 assert_different_registers(oldval,newval,base,tmp);
1796
1797 #ifdef AARCH64
1798 Label loop;
1799
1800 assert(oopDesc::mark_offset_in_bytes() == 0, "must be");
1801
1802 bind(loop);
1803 ldxr(tmp, base);
1804 cmp(tmp, oldval);
1805 b(slow_case, ne);
1806 // MemBarReleaseLock barrier
1807 stlxr(tmp, newval, base);
1808 if (one_shot) {
1809 cmp_w(tmp, 0);
1810 } else {
1811 cbnz_w(tmp, loop);
1812 fallthrough_is_success = true;
1813 }
1814 #else
1815 // MemBarReleaseLock barrier
1816 // According to JSR-133 Cookbook, this should be StoreStore | LoadStore,
1817 // but that doesn't prevent a load or store from floating down between
1818 // the load and store in the CAS sequence, so play it safe and
1819 // do a full fence.
1820 membar(Membar_mask_bits(LoadLoad | LoadStore | StoreStore | StoreLoad), tmp);
1821
1822 if (one_shot) {
1823 ldrex(tmp, Address(base, oopDesc::mark_offset_in_bytes()));
1824 cmp(tmp, oldval);
1825 strex(tmp, newval, Address(base, oopDesc::mark_offset_in_bytes()), eq);
1826 cmp(tmp, 0, eq);
1827 } else {
1828 atomic_cas_bool(oldval, newval, base, oopDesc::mark_offset_in_bytes(), tmp);
1829 }
1830 #endif
1831 if (!fallthrough_is_success && !allow_fallthrough_on_failure) {
1832 b(slow_case, ne);
1833 }
1834
1835 // ExitEnter
1836 // According to JSR-133 Cookbook, this should be StoreLoad, the same
1837 // barrier that follows volatile store.
1838 // TODO: Should be able to remove on armv8 if volatile loads
1839 // use the load-acquire instruction.
1840 membar(StoreLoad, noreg);
1841 }
1842
1843 #ifndef PRODUCT
1844
1845 // Preserves flags and all registers.
1846 // On SMP the updated value might not be visible to external observers without a sychronization barrier
cond_atomic_inc32(AsmCondition cond,int * counter_addr)1847 void MacroAssembler::cond_atomic_inc32(AsmCondition cond, int* counter_addr) {
1848 if (counter_addr != NULL) {
1849 InlinedAddress counter_addr_literal((address)counter_addr);
1850 Label done, retry;
1851 if (cond != al) {
1852 b(done, inverse(cond));
1853 }
1854
1855 #ifdef AARCH64
1856 raw_push(R0, R1);
1857 raw_push(R2, ZR);
1858
1859 ldr_literal(R0, counter_addr_literal);
1860
1861 bind(retry);
1862 ldxr_w(R1, R0);
1863 add_w(R1, R1, 1);
1864 stxr_w(R2, R1, R0);
1865 cbnz_w(R2, retry);
1866
1867 raw_pop(R2, ZR);
1868 raw_pop(R0, R1);
1869 #else
1870 push(RegisterSet(R0, R3) | RegisterSet(Rtemp));
1871 ldr_literal(R0, counter_addr_literal);
1872
1873 mrs(CPSR, Rtemp);
1874
1875 bind(retry);
1876 ldr_s32(R1, Address(R0));
1877 add(R2, R1, 1);
1878 atomic_cas_bool(R1, R2, R0, 0, R3);
1879 b(retry, ne);
1880
1881 msr(CPSR_fsxc, Rtemp);
1882
1883 pop(RegisterSet(R0, R3) | RegisterSet(Rtemp));
1884 #endif // AARCH64
1885
1886 b(done);
1887 bind_literal(counter_addr_literal);
1888
1889 bind(done);
1890 }
1891 }
1892
1893 #endif // !PRODUCT
1894
1895
1896 // Building block for CAS cases of biased locking: makes CAS and records statistics.
1897 // The slow_case label is used to transfer control if CAS fails. Otherwise leaves condition codes set.
biased_locking_enter_with_cas(Register obj_reg,Register old_mark_reg,Register new_mark_reg,Register tmp,Label & slow_case,int * counter_addr)1898 void MacroAssembler::biased_locking_enter_with_cas(Register obj_reg, Register old_mark_reg, Register new_mark_reg,
1899 Register tmp, Label& slow_case, int* counter_addr) {
1900
1901 cas_for_lock_acquire(old_mark_reg, new_mark_reg, obj_reg, tmp, slow_case);
1902 #ifdef ASSERT
1903 breakpoint(ne); // Fallthrough only on success
1904 #endif
1905 #ifndef PRODUCT
1906 if (counter_addr != NULL) {
1907 cond_atomic_inc32(al, counter_addr);
1908 }
1909 #endif // !PRODUCT
1910 }
1911
biased_locking_enter(Register obj_reg,Register swap_reg,Register tmp_reg,bool swap_reg_contains_mark,Register tmp2,Label & done,Label & slow_case,BiasedLockingCounters * counters)1912 int MacroAssembler::biased_locking_enter(Register obj_reg, Register swap_reg, Register tmp_reg,
1913 bool swap_reg_contains_mark,
1914 Register tmp2,
1915 Label& done, Label& slow_case,
1916 BiasedLockingCounters* counters) {
1917 // obj_reg must be preserved (at least) if the bias locking fails
1918 // tmp_reg is a temporary register
1919 // swap_reg was used as a temporary but contained a value
1920 // that was used afterwards in some call pathes. Callers
1921 // have been fixed so that swap_reg no longer needs to be
1922 // saved.
1923 // Rtemp in no longer scratched
1924
1925 assert(UseBiasedLocking, "why call this otherwise?");
1926 assert_different_registers(obj_reg, swap_reg, tmp_reg, tmp2);
1927 guarantee(swap_reg!=tmp_reg, "invariant");
1928 assert(tmp_reg != noreg, "must supply tmp_reg");
1929
1930 #ifndef PRODUCT
1931 if (PrintBiasedLockingStatistics && (counters == NULL)) {
1932 counters = BiasedLocking::counters();
1933 }
1934 #endif
1935
1936 assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
1937 Address mark_addr(obj_reg, oopDesc::mark_offset_in_bytes());
1938
1939 // Biased locking
1940 // See whether the lock is currently biased toward our thread and
1941 // whether the epoch is still valid
1942 // Note that the runtime guarantees sufficient alignment of JavaThread
1943 // pointers to allow age to be placed into low bits
1944 // First check to see whether biasing is even enabled for this object
1945 Label cas_label;
1946
1947 // The null check applies to the mark loading, if we need to load it.
1948 // If the mark has already been loaded in swap_reg then it has already
1949 // been performed and the offset is irrelevant.
1950 int null_check_offset = offset();
1951 if (!swap_reg_contains_mark) {
1952 ldr(swap_reg, mark_addr);
1953 }
1954
1955 // On MP platform loads could return 'stale' values in some cases.
1956 // That is acceptable since either CAS or slow case path is taken in the worst case.
1957
1958 andr(tmp_reg, swap_reg, (uintx)markOopDesc::biased_lock_mask_in_place);
1959 cmp(tmp_reg, markOopDesc::biased_lock_pattern);
1960
1961 b(cas_label, ne);
1962
1963 // The bias pattern is present in the object's header. Need to check
1964 // whether the bias owner and the epoch are both still current.
1965 load_klass(tmp_reg, obj_reg);
1966 ldr(tmp_reg, Address(tmp_reg, Klass::prototype_header_offset()));
1967 orr(tmp_reg, tmp_reg, Rthread);
1968 eor(tmp_reg, tmp_reg, swap_reg);
1969
1970 #ifdef AARCH64
1971 ands(tmp_reg, tmp_reg, ~((uintx) markOopDesc::age_mask_in_place));
1972 #else
1973 bics(tmp_reg, tmp_reg, ((int) markOopDesc::age_mask_in_place));
1974 #endif // AARCH64
1975
1976 #ifndef PRODUCT
1977 if (counters != NULL) {
1978 cond_atomic_inc32(eq, counters->biased_lock_entry_count_addr());
1979 }
1980 #endif // !PRODUCT
1981
1982 b(done, eq);
1983
1984 Label try_revoke_bias;
1985 Label try_rebias;
1986
1987 // At this point we know that the header has the bias pattern and
1988 // that we are not the bias owner in the current epoch. We need to
1989 // figure out more details about the state of the header in order to
1990 // know what operations can be legally performed on the object's
1991 // header.
1992
1993 // If the low three bits in the xor result aren't clear, that means
1994 // the prototype header is no longer biased and we have to revoke
1995 // the bias on this object.
1996 tst(tmp_reg, (uintx)markOopDesc::biased_lock_mask_in_place);
1997 b(try_revoke_bias, ne);
1998
1999 // Biasing is still enabled for this data type. See whether the
2000 // epoch of the current bias is still valid, meaning that the epoch
2001 // bits of the mark word are equal to the epoch bits of the
2002 // prototype header. (Note that the prototype header's epoch bits
2003 // only change at a safepoint.) If not, attempt to rebias the object
2004 // toward the current thread. Note that we must be absolutely sure
2005 // that the current epoch is invalid in order to do this because
2006 // otherwise the manipulations it performs on the mark word are
2007 // illegal.
2008 tst(tmp_reg, (uintx)markOopDesc::epoch_mask_in_place);
2009 b(try_rebias, ne);
2010
2011 // tmp_reg has the age, epoch and pattern bits cleared
2012 // The remaining (owner) bits are (Thread ^ current_owner)
2013
2014 // The epoch of the current bias is still valid but we know nothing
2015 // about the owner; it might be set or it might be clear. Try to
2016 // acquire the bias of the object using an atomic operation. If this
2017 // fails we will go in to the runtime to revoke the object's bias.
2018 // Note that we first construct the presumed unbiased header so we
2019 // don't accidentally blow away another thread's valid bias.
2020
2021 // Note that we know the owner is not ourself. Hence, success can
2022 // only happen when the owner bits is 0
2023
2024 #ifdef AARCH64
2025 // Bit mask biased_lock + age + epoch is not a valid AArch64 logical immediate, as it has
2026 // cleared bit in the middle (cms bit). So it is loaded with separate instruction.
2027 mov(tmp2, (markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place));
2028 andr(swap_reg, swap_reg, tmp2);
2029 #else
2030 // until the assembler can be made smarter, we need to make some assumptions about the values
2031 // so we can optimize this:
2032 assert((markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place) == 0x1ff, "biased bitmasks changed");
2033
2034 mov(swap_reg, AsmOperand(swap_reg, lsl, 23));
2035 mov(swap_reg, AsmOperand(swap_reg, lsr, 23)); // markOop with thread bits cleared (for CAS)
2036 #endif // AARCH64
2037
2038 orr(tmp_reg, swap_reg, Rthread); // new mark
2039
2040 biased_locking_enter_with_cas(obj_reg, swap_reg, tmp_reg, tmp2, slow_case,
2041 (counters != NULL) ? counters->anonymously_biased_lock_entry_count_addr() : NULL);
2042
2043 // If the biasing toward our thread failed, this means that
2044 // another thread succeeded in biasing it toward itself and we
2045 // need to revoke that bias. The revocation will occur in the
2046 // interpreter runtime in the slow case.
2047
2048 b(done);
2049
2050 bind(try_rebias);
2051
2052 // At this point we know the epoch has expired, meaning that the
2053 // current "bias owner", if any, is actually invalid. Under these
2054 // circumstances _only_, we are allowed to use the current header's
2055 // value as the comparison value when doing the cas to acquire the
2056 // bias in the current epoch. In other words, we allow transfer of
2057 // the bias from one thread to another directly in this situation.
2058
2059 // tmp_reg low (not owner) bits are (age: 0 | pattern&epoch: prototype^swap_reg)
2060
2061 eor(tmp_reg, tmp_reg, swap_reg); // OK except for owner bits (age preserved !)
2062
2063 // owner bits 'random'. Set them to Rthread.
2064 #ifdef AARCH64
2065 mov(tmp2, (markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place));
2066 andr(tmp_reg, tmp_reg, tmp2);
2067 #else
2068 mov(tmp_reg, AsmOperand(tmp_reg, lsl, 23));
2069 mov(tmp_reg, AsmOperand(tmp_reg, lsr, 23));
2070 #endif // AARCH64
2071
2072 orr(tmp_reg, tmp_reg, Rthread); // new mark
2073
2074 biased_locking_enter_with_cas(obj_reg, swap_reg, tmp_reg, tmp2, slow_case,
2075 (counters != NULL) ? counters->rebiased_lock_entry_count_addr() : NULL);
2076
2077 // If the biasing toward our thread failed, then another thread
2078 // succeeded in biasing it toward itself and we need to revoke that
2079 // bias. The revocation will occur in the runtime in the slow case.
2080
2081 b(done);
2082
2083 bind(try_revoke_bias);
2084
2085 // The prototype mark in the klass doesn't have the bias bit set any
2086 // more, indicating that objects of this data type are not supposed
2087 // to be biased any more. We are going to try to reset the mark of
2088 // this object to the prototype value and fall through to the
2089 // CAS-based locking scheme. Note that if our CAS fails, it means
2090 // that another thread raced us for the privilege of revoking the
2091 // bias of this particular object, so it's okay to continue in the
2092 // normal locking code.
2093
2094 // tmp_reg low (not owner) bits are (age: 0 | pattern&epoch: prototype^swap_reg)
2095
2096 eor(tmp_reg, tmp_reg, swap_reg); // OK except for owner bits (age preserved !)
2097
2098 // owner bits 'random'. Clear them
2099 #ifdef AARCH64
2100 mov(tmp2, (markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place));
2101 andr(tmp_reg, tmp_reg, tmp2);
2102 #else
2103 mov(tmp_reg, AsmOperand(tmp_reg, lsl, 23));
2104 mov(tmp_reg, AsmOperand(tmp_reg, lsr, 23));
2105 #endif // AARCH64
2106
2107 biased_locking_enter_with_cas(obj_reg, swap_reg, tmp_reg, tmp2, cas_label,
2108 (counters != NULL) ? counters->revoked_lock_entry_count_addr() : NULL);
2109
2110 // Fall through to the normal CAS-based lock, because no matter what
2111 // the result of the above CAS, some thread must have succeeded in
2112 // removing the bias bit from the object's header.
2113
2114 bind(cas_label);
2115
2116 return null_check_offset;
2117 }
2118
2119
biased_locking_exit(Register obj_reg,Register tmp_reg,Label & done)2120 void MacroAssembler::biased_locking_exit(Register obj_reg, Register tmp_reg, Label& done) {
2121 assert(UseBiasedLocking, "why call this otherwise?");
2122
2123 // Check for biased locking unlock case, which is a no-op
2124 // Note: we do not have to check the thread ID for two reasons.
2125 // First, the interpreter checks for IllegalMonitorStateException at
2126 // a higher level. Second, if the bias was revoked while we held the
2127 // lock, the object could not be rebiased toward another thread, so
2128 // the bias bit would be clear.
2129 ldr(tmp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
2130
2131 andr(tmp_reg, tmp_reg, (uintx)markOopDesc::biased_lock_mask_in_place);
2132 cmp(tmp_reg, markOopDesc::biased_lock_pattern);
2133 b(done, eq);
2134 }
2135
2136
resolve_jobject(Register value,Register tmp1,Register tmp2)2137 void MacroAssembler::resolve_jobject(Register value,
2138 Register tmp1,
2139 Register tmp2) {
2140 assert_different_registers(value, tmp1, tmp2);
2141 Label done, not_weak;
2142 cbz(value, done); // Use NULL as-is.
2143 STATIC_ASSERT(JNIHandles::weak_tag_mask == 1u);
2144 tbz(value, 0, not_weak); // Test for jweak tag.
2145
2146 // Resolve jweak.
2147 access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF,
2148 Address(value, -JNIHandles::weak_tag_value), value, tmp1, tmp2, noreg);
2149 b(done);
2150 bind(not_weak);
2151 // Resolve (untagged) jobject.
2152 access_load_at(T_OBJECT, IN_NATIVE,
2153 Address(value, 0), value, tmp1, tmp2, noreg);
2154 verify_oop(value);
2155 bind(done);
2156 }
2157
2158
2159 //////////////////////////////////////////////////////////////////////////////////
2160
2161 #ifdef AARCH64
2162
load_sized_value(Register dst,Address src,size_t size_in_bytes,bool is_signed)2163 void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed) {
2164 switch (size_in_bytes) {
2165 case 8: ldr(dst, src); break;
2166 case 4: is_signed ? ldr_s32(dst, src) : ldr_u32(dst, src); break;
2167 case 2: is_signed ? ldrsh(dst, src) : ldrh(dst, src); break;
2168 case 1: is_signed ? ldrsb(dst, src) : ldrb(dst, src); break;
2169 default: ShouldNotReachHere();
2170 }
2171 }
2172
store_sized_value(Register src,Address dst,size_t size_in_bytes)2173 void MacroAssembler::store_sized_value(Register src, Address dst, size_t size_in_bytes) {
2174 switch (size_in_bytes) {
2175 case 8: str(src, dst); break;
2176 case 4: str_32(src, dst); break;
2177 case 2: strh(src, dst); break;
2178 case 1: strb(src, dst); break;
2179 default: ShouldNotReachHere();
2180 }
2181 }
2182
2183 #else
2184
load_sized_value(Register dst,Address src,size_t size_in_bytes,bool is_signed,AsmCondition cond)2185 void MacroAssembler::load_sized_value(Register dst, Address src,
2186 size_t size_in_bytes, bool is_signed, AsmCondition cond) {
2187 switch (size_in_bytes) {
2188 case 4: ldr(dst, src, cond); break;
2189 case 2: is_signed ? ldrsh(dst, src, cond) : ldrh(dst, src, cond); break;
2190 case 1: is_signed ? ldrsb(dst, src, cond) : ldrb(dst, src, cond); break;
2191 default: ShouldNotReachHere();
2192 }
2193 }
2194
2195
store_sized_value(Register src,Address dst,size_t size_in_bytes,AsmCondition cond)2196 void MacroAssembler::store_sized_value(Register src, Address dst, size_t size_in_bytes, AsmCondition cond) {
2197 switch (size_in_bytes) {
2198 case 4: str(src, dst, cond); break;
2199 case 2: strh(src, dst, cond); break;
2200 case 1: strb(src, dst, cond); break;
2201 default: ShouldNotReachHere();
2202 }
2203 }
2204 #endif // AARCH64
2205
2206 // Look up the method for a megamorphic invokeinterface call.
2207 // The target method is determined by <Rinterf, Rindex>.
2208 // The receiver klass is in Rklass.
2209 // On success, the result will be in method_result, and execution falls through.
2210 // On failure, execution transfers to the given label.
lookup_interface_method(Register Rklass,Register Rintf,RegisterOrConstant itable_index,Register method_result,Register Rscan,Register Rtmp,Label & L_no_such_interface)2211 void MacroAssembler::lookup_interface_method(Register Rklass,
2212 Register Rintf,
2213 RegisterOrConstant itable_index,
2214 Register method_result,
2215 Register Rscan,
2216 Register Rtmp,
2217 Label& L_no_such_interface) {
2218
2219 assert_different_registers(Rklass, Rintf, Rscan, Rtmp);
2220
2221 const int entry_size = itableOffsetEntry::size() * HeapWordSize;
2222 assert(itableOffsetEntry::interface_offset_in_bytes() == 0, "not added for convenience");
2223
2224 // Compute start of first itableOffsetEntry (which is at the end of the vtable)
2225 const int base = in_bytes(Klass::vtable_start_offset());
2226 const int scale = exact_log2(vtableEntry::size_in_bytes());
2227 ldr_s32(Rtmp, Address(Rklass, Klass::vtable_length_offset())); // Get length of vtable
2228 add(Rscan, Rklass, base);
2229 add(Rscan, Rscan, AsmOperand(Rtmp, lsl, scale));
2230
2231 // Search through the itable for an interface equal to incoming Rintf
2232 // itable looks like [intface][offset][intface][offset][intface][offset]
2233
2234 Label loop;
2235 bind(loop);
2236 ldr(Rtmp, Address(Rscan, entry_size, post_indexed));
2237 #ifdef AARCH64
2238 Label found;
2239 cmp(Rtmp, Rintf);
2240 b(found, eq);
2241 cbnz(Rtmp, loop);
2242 #else
2243 cmp(Rtmp, Rintf); // set ZF and CF if interface is found
2244 cmn(Rtmp, 0, ne); // check if tmp == 0 and clear CF if it is
2245 b(loop, ne);
2246 #endif // AARCH64
2247
2248 #ifdef AARCH64
2249 b(L_no_such_interface);
2250 bind(found);
2251 #else
2252 // CF == 0 means we reached the end of itable without finding icklass
2253 b(L_no_such_interface, cc);
2254 #endif // !AARCH64
2255
2256 if (method_result != noreg) {
2257 // Interface found at previous position of Rscan, now load the method
2258 ldr_s32(Rtmp, Address(Rscan, itableOffsetEntry::offset_offset_in_bytes() - entry_size));
2259 if (itable_index.is_register()) {
2260 add(Rtmp, Rtmp, Rklass); // Add offset to Klass*
2261 assert(itableMethodEntry::size() * HeapWordSize == wordSize, "adjust the scaling in the code below");
2262 assert(itableMethodEntry::method_offset_in_bytes() == 0, "adjust the offset in the code below");
2263 ldr(method_result, Address::indexed_ptr(Rtmp, itable_index.as_register()));
2264 } else {
2265 int method_offset = itableMethodEntry::size() * HeapWordSize * itable_index.as_constant() +
2266 itableMethodEntry::method_offset_in_bytes();
2267 add_slow(method_result, Rklass, method_offset);
2268 ldr(method_result, Address(method_result, Rtmp));
2269 }
2270 }
2271 }
2272
2273 #ifdef COMPILER2
2274 // TODO: 8 bytes at a time? pre-fetch?
2275 // Compare char[] arrays aligned to 4 bytes.
char_arrays_equals(Register ary1,Register ary2,Register limit,Register result,Register chr1,Register chr2,Label & Ldone)2276 void MacroAssembler::char_arrays_equals(Register ary1, Register ary2,
2277 Register limit, Register result,
2278 Register chr1, Register chr2, Label& Ldone) {
2279 Label Lvector, Lloop;
2280
2281 // Note: limit contains number of bytes (2*char_elements) != 0.
2282 tst(limit, 0x2); // trailing character ?
2283 b(Lvector, eq);
2284
2285 // compare the trailing char
2286 sub(limit, limit, sizeof(jchar));
2287 ldrh(chr1, Address(ary1, limit));
2288 ldrh(chr2, Address(ary2, limit));
2289 cmp(chr1, chr2);
2290 mov(result, 0, ne); // not equal
2291 b(Ldone, ne);
2292
2293 // only one char ?
2294 tst(limit, limit);
2295 mov(result, 1, eq);
2296 b(Ldone, eq);
2297
2298 // word by word compare, dont't need alignment check
2299 bind(Lvector);
2300
2301 // Shift ary1 and ary2 to the end of the arrays, negate limit
2302 add(ary1, limit, ary1);
2303 add(ary2, limit, ary2);
2304 neg(limit, limit);
2305
2306 bind(Lloop);
2307 ldr_u32(chr1, Address(ary1, limit));
2308 ldr_u32(chr2, Address(ary2, limit));
2309 cmp_32(chr1, chr2);
2310 mov(result, 0, ne); // not equal
2311 b(Ldone, ne);
2312 adds(limit, limit, 2*sizeof(jchar));
2313 b(Lloop, ne);
2314
2315 // Caller should set it:
2316 // mov(result_reg, 1); //equal
2317 }
2318 #endif
2319
inc_counter(address counter_addr,Register tmpreg1,Register tmpreg2)2320 void MacroAssembler::inc_counter(address counter_addr, Register tmpreg1, Register tmpreg2) {
2321 mov_slow(tmpreg1, counter_addr);
2322 ldr_s32(tmpreg2, tmpreg1);
2323 add_32(tmpreg2, tmpreg2, 1);
2324 str_32(tmpreg2, tmpreg1);
2325 }
2326
floating_cmp(Register dst)2327 void MacroAssembler::floating_cmp(Register dst) {
2328 #ifdef AARCH64
2329 NOT_TESTED();
2330 cset(dst, gt); // 1 if '>', else 0
2331 csinv(dst, dst, ZR, ge); // previous value if '>=', else -1
2332 #else
2333 vmrs(dst, FPSCR);
2334 orr(dst, dst, 0x08000000);
2335 eor(dst, dst, AsmOperand(dst, lsl, 3));
2336 mov(dst, AsmOperand(dst, asr, 30));
2337 #endif
2338 }
2339
restore_default_fp_mode()2340 void MacroAssembler::restore_default_fp_mode() {
2341 #ifdef AARCH64
2342 msr(SysReg_FPCR, ZR);
2343 #else
2344 #ifndef __SOFTFP__
2345 // Round to Near mode, IEEE compatible, masked exceptions
2346 mov(Rtemp, 0);
2347 vmsr(FPSCR, Rtemp);
2348 #endif // !__SOFTFP__
2349 #endif // AARCH64
2350 }
2351
2352 #ifndef AARCH64
2353 // 24-bit word range == 26-bit byte range
check26(int offset)2354 bool check26(int offset) {
2355 // this could be simplified, but it mimics encoding and decoding
2356 // an actual branch insrtuction
2357 int off1 = offset << 6 >> 8;
2358 int encoded = off1 & ((1<<24)-1);
2359 int decoded = encoded << 8 >> 6;
2360 return offset == decoded;
2361 }
2362 #endif // !AARCH64
2363
2364 // Perform some slight adjustments so the default 32MB code cache
2365 // is fully reachable.
first_cache_address()2366 static inline address first_cache_address() {
2367 return CodeCache::low_bound() + sizeof(HeapBlock::Header);
2368 }
last_cache_address()2369 static inline address last_cache_address() {
2370 return CodeCache::high_bound() - Assembler::InstructionSize;
2371 }
2372
2373 #ifdef AARCH64
2374 // Can we reach target using ADRP?
page_reachable_from_cache(address target)2375 bool MacroAssembler::page_reachable_from_cache(address target) {
2376 intptr_t cl = (intptr_t)first_cache_address() & ~0xfff;
2377 intptr_t ch = (intptr_t)last_cache_address() & ~0xfff;
2378 intptr_t addr = (intptr_t)target & ~0xfff;
2379
2380 intptr_t loffset = addr - cl;
2381 intptr_t hoffset = addr - ch;
2382 return is_imm_in_range(loffset >> 12, 21, 0) && is_imm_in_range(hoffset >> 12, 21, 0);
2383 }
2384 #endif
2385
2386 // Can we reach target using unconditional branch or call from anywhere
2387 // in the code cache (because code can be relocated)?
_reachable_from_cache(address target)2388 bool MacroAssembler::_reachable_from_cache(address target) {
2389 #ifdef __thumb__
2390 if ((1 & (intptr_t)target) != 0) {
2391 // Return false to avoid 'b' if we need switching to THUMB mode.
2392 return false;
2393 }
2394 #endif
2395
2396 address cl = first_cache_address();
2397 address ch = last_cache_address();
2398
2399 if (ForceUnreachable) {
2400 // Only addresses from CodeCache can be treated as reachable.
2401 if (target < CodeCache::low_bound() || CodeCache::high_bound() < target) {
2402 return false;
2403 }
2404 }
2405
2406 intptr_t loffset = (intptr_t)target - (intptr_t)cl;
2407 intptr_t hoffset = (intptr_t)target - (intptr_t)ch;
2408
2409 #ifdef AARCH64
2410 return is_offset_in_range(loffset, 26) && is_offset_in_range(hoffset, 26);
2411 #else
2412 return check26(loffset - 8) && check26(hoffset - 8);
2413 #endif
2414 }
2415
reachable_from_cache(address target)2416 bool MacroAssembler::reachable_from_cache(address target) {
2417 assert(CodeCache::contains(pc()), "not supported");
2418 return _reachable_from_cache(target);
2419 }
2420
2421 // Can we reach the entire code cache from anywhere else in the code cache?
_cache_fully_reachable()2422 bool MacroAssembler::_cache_fully_reachable() {
2423 address cl = first_cache_address();
2424 address ch = last_cache_address();
2425 return _reachable_from_cache(cl) && _reachable_from_cache(ch);
2426 }
2427
cache_fully_reachable()2428 bool MacroAssembler::cache_fully_reachable() {
2429 assert(CodeCache::contains(pc()), "not supported");
2430 return _cache_fully_reachable();
2431 }
2432
jump(address target,relocInfo::relocType rtype,Register scratch NOT_AARCH64_ARG (AsmCondition cond))2433 void MacroAssembler::jump(address target, relocInfo::relocType rtype, Register scratch NOT_AARCH64_ARG(AsmCondition cond)) {
2434 assert((rtype == relocInfo::runtime_call_type) || (rtype == relocInfo::none), "not supported");
2435 if (reachable_from_cache(target)) {
2436 relocate(rtype);
2437 b(target NOT_AARCH64_ARG(cond));
2438 return;
2439 }
2440
2441 // Note: relocate is not needed for the code below,
2442 // encoding targets in absolute format.
2443 if (ignore_non_patchable_relocations()) {
2444 rtype = relocInfo::none;
2445 }
2446
2447 #ifdef AARCH64
2448 assert (scratch != noreg, "should be specified");
2449 InlinedAddress address_literal(target, rtype);
2450 ldr_literal(scratch, address_literal);
2451 br(scratch);
2452 int off = offset();
2453 bind_literal(address_literal);
2454 #ifdef COMPILER2
2455 if (offset() - off == wordSize) {
2456 // no padding, so insert nop for worst-case sizing
2457 nop();
2458 }
2459 #endif
2460 #else
2461 if (VM_Version::supports_movw() && (scratch != noreg) && (rtype == relocInfo::none)) {
2462 // Note: this version cannot be (atomically) patched
2463 mov_slow(scratch, (intptr_t)target, cond);
2464 bx(scratch, cond);
2465 } else {
2466 Label skip;
2467 InlinedAddress address_literal(target);
2468 if (cond != al) {
2469 b(skip, inverse(cond));
2470 }
2471 relocate(rtype);
2472 ldr_literal(PC, address_literal);
2473 bind_literal(address_literal);
2474 bind(skip);
2475 }
2476 #endif // AARCH64
2477 }
2478
2479 // Similar to jump except that:
2480 // - near calls are valid only if any destination in the cache is near
2481 // - no movt/movw (not atomically patchable)
patchable_jump(address target,relocInfo::relocType rtype,Register scratch NOT_AARCH64_ARG (AsmCondition cond))2482 void MacroAssembler::patchable_jump(address target, relocInfo::relocType rtype, Register scratch NOT_AARCH64_ARG(AsmCondition cond)) {
2483 assert((rtype == relocInfo::runtime_call_type) || (rtype == relocInfo::none), "not supported");
2484 if (cache_fully_reachable()) {
2485 // Note: this assumes that all possible targets (the initial one
2486 // and the addressed patched to) are all in the code cache.
2487 assert(CodeCache::contains(target), "target might be too far");
2488 relocate(rtype);
2489 b(target NOT_AARCH64_ARG(cond));
2490 return;
2491 }
2492
2493 // Discard the relocation information if not needed for CacheCompiledCode
2494 // since the next encodings are all in absolute format.
2495 if (ignore_non_patchable_relocations()) {
2496 rtype = relocInfo::none;
2497 }
2498
2499 #ifdef AARCH64
2500 assert (scratch != noreg, "should be specified");
2501 InlinedAddress address_literal(target);
2502 relocate(rtype);
2503 ldr_literal(scratch, address_literal);
2504 br(scratch);
2505 int off = offset();
2506 bind_literal(address_literal);
2507 #ifdef COMPILER2
2508 if (offset() - off == wordSize) {
2509 // no padding, so insert nop for worst-case sizing
2510 nop();
2511 }
2512 #endif
2513 #else
2514 {
2515 Label skip;
2516 InlinedAddress address_literal(target);
2517 if (cond != al) {
2518 b(skip, inverse(cond));
2519 }
2520 relocate(rtype);
2521 ldr_literal(PC, address_literal);
2522 bind_literal(address_literal);
2523 bind(skip);
2524 }
2525 #endif // AARCH64
2526 }
2527
call(address target,RelocationHolder rspec NOT_AARCH64_ARG (AsmCondition cond))2528 void MacroAssembler::call(address target, RelocationHolder rspec NOT_AARCH64_ARG(AsmCondition cond)) {
2529 Register scratch = LR;
2530 assert(rspec.type() == relocInfo::runtime_call_type || rspec.type() == relocInfo::none, "not supported");
2531 if (reachable_from_cache(target)) {
2532 relocate(rspec);
2533 bl(target NOT_AARCH64_ARG(cond));
2534 return;
2535 }
2536
2537 // Note: relocate is not needed for the code below,
2538 // encoding targets in absolute format.
2539 if (ignore_non_patchable_relocations()) {
2540 // This assumes the information was needed only for relocating the code.
2541 rspec = RelocationHolder::none;
2542 }
2543
2544 #ifndef AARCH64
2545 if (VM_Version::supports_movw() && (rspec.type() == relocInfo::none)) {
2546 // Note: this version cannot be (atomically) patched
2547 mov_slow(scratch, (intptr_t)target, cond);
2548 blx(scratch, cond);
2549 return;
2550 }
2551 #endif
2552
2553 {
2554 Label ret_addr;
2555 #ifndef AARCH64
2556 if (cond != al) {
2557 b(ret_addr, inverse(cond));
2558 }
2559 #endif
2560
2561
2562 #ifdef AARCH64
2563 // TODO-AARCH64: make more optimal implementation
2564 // [ Keep in sync with MacroAssembler::call_size ]
2565 assert(rspec.type() == relocInfo::none, "call reloc not implemented");
2566 mov_slow(scratch, target);
2567 blr(scratch);
2568 #else
2569 InlinedAddress address_literal(target);
2570 relocate(rspec);
2571 adr(LR, ret_addr);
2572 ldr_literal(PC, address_literal);
2573
2574 bind_literal(address_literal);
2575 bind(ret_addr);
2576 #endif
2577 }
2578 }
2579
2580 #if defined(AARCH64) && defined(COMPILER2)
call_size(address target,bool far,bool patchable)2581 int MacroAssembler::call_size(address target, bool far, bool patchable) {
2582 // FIXME: mov_slow is variable-length
2583 if (!far) return 1; // bl
2584 if (patchable) return 2; // ldr; blr
2585 return instr_count_for_mov_slow((intptr_t)target) + 1;
2586 }
2587 #endif
2588
patchable_call(address target,RelocationHolder const & rspec,bool c2)2589 int MacroAssembler::patchable_call(address target, RelocationHolder const& rspec, bool c2) {
2590 assert(rspec.type() == relocInfo::static_call_type ||
2591 rspec.type() == relocInfo::none ||
2592 rspec.type() == relocInfo::opt_virtual_call_type, "not supported");
2593
2594 // Always generate the relocation information, needed for patching
2595 relocate(rspec); // used by NativeCall::is_call_before()
2596 if (cache_fully_reachable()) {
2597 // Note: this assumes that all possible targets (the initial one
2598 // and the addresses patched to) are all in the code cache.
2599 assert(CodeCache::contains(target), "target might be too far");
2600 bl(target);
2601 } else {
2602 #if defined(AARCH64) && defined(COMPILER2)
2603 if (c2) {
2604 // return address needs to match call_size().
2605 // no need to trash Rtemp
2606 int off = offset();
2607 Label skip_literal;
2608 InlinedAddress address_literal(target);
2609 ldr_literal(LR, address_literal);
2610 blr(LR);
2611 int ret_addr_offset = offset();
2612 assert(offset() - off == call_size(target, true, true) * InstructionSize, "need to fix call_size()");
2613 b(skip_literal);
2614 int off2 = offset();
2615 bind_literal(address_literal);
2616 if (offset() - off2 == wordSize) {
2617 // no padding, so insert nop for worst-case sizing
2618 nop();
2619 }
2620 bind(skip_literal);
2621 return ret_addr_offset;
2622 }
2623 #endif
2624 Label ret_addr;
2625 InlinedAddress address_literal(target);
2626 #ifdef AARCH64
2627 ldr_literal(Rtemp, address_literal);
2628 adr(LR, ret_addr);
2629 br(Rtemp);
2630 #else
2631 adr(LR, ret_addr);
2632 ldr_literal(PC, address_literal);
2633 #endif
2634 bind_literal(address_literal);
2635 bind(ret_addr);
2636 }
2637 return offset();
2638 }
2639
2640 // ((OopHandle)result).resolve();
resolve_oop_handle(Register result)2641 void MacroAssembler::resolve_oop_handle(Register result) {
2642 // OopHandle::resolve is an indirection.
2643 ldr(result, Address(result, 0));
2644 }
2645
load_mirror(Register mirror,Register method,Register tmp)2646 void MacroAssembler::load_mirror(Register mirror, Register method, Register tmp) {
2647 const int mirror_offset = in_bytes(Klass::java_mirror_offset());
2648 ldr(tmp, Address(method, Method::const_offset()));
2649 ldr(tmp, Address(tmp, ConstMethod::constants_offset()));
2650 ldr(tmp, Address(tmp, ConstantPool::pool_holder_offset_in_bytes()));
2651 ldr(mirror, Address(tmp, mirror_offset));
2652 resolve_oop_handle(mirror);
2653 }
2654
2655
2656 ///////////////////////////////////////////////////////////////////////////////
2657
2658 // Compressed pointers
2659
2660 #ifdef AARCH64
2661
load_klass(Register dst_klass,Register src_oop)2662 void MacroAssembler::load_klass(Register dst_klass, Register src_oop) {
2663 if (UseCompressedClassPointers) {
2664 ldr_w(dst_klass, Address(src_oop, oopDesc::klass_offset_in_bytes()));
2665 decode_klass_not_null(dst_klass);
2666 } else {
2667 ldr(dst_klass, Address(src_oop, oopDesc::klass_offset_in_bytes()));
2668 }
2669 }
2670
2671 #else
2672
load_klass(Register dst_klass,Register src_oop,AsmCondition cond)2673 void MacroAssembler::load_klass(Register dst_klass, Register src_oop, AsmCondition cond) {
2674 ldr(dst_klass, Address(src_oop, oopDesc::klass_offset_in_bytes()), cond);
2675 }
2676
2677 #endif // AARCH64
2678
2679 // Blows src_klass.
store_klass(Register src_klass,Register dst_oop)2680 void MacroAssembler::store_klass(Register src_klass, Register dst_oop) {
2681 #ifdef AARCH64
2682 if (UseCompressedClassPointers) {
2683 assert(src_klass != dst_oop, "not enough registers");
2684 encode_klass_not_null(src_klass);
2685 str_w(src_klass, Address(dst_oop, oopDesc::klass_offset_in_bytes()));
2686 return;
2687 }
2688 #endif // AARCH64
2689 str(src_klass, Address(dst_oop, oopDesc::klass_offset_in_bytes()));
2690 }
2691
2692 #ifdef AARCH64
2693
store_klass_gap(Register dst)2694 void MacroAssembler::store_klass_gap(Register dst) {
2695 if (UseCompressedClassPointers) {
2696 str_w(ZR, Address(dst, oopDesc::klass_gap_offset_in_bytes()));
2697 }
2698 }
2699
2700 #endif // AARCH64
2701
2702
load_heap_oop(Register dst,Address src,Register tmp1,Register tmp2,Register tmp3,DecoratorSet decorators)2703 void MacroAssembler::load_heap_oop(Register dst, Address src, Register tmp1, Register tmp2, Register tmp3, DecoratorSet decorators) {
2704 access_load_at(T_OBJECT, IN_HEAP | decorators, src, dst, tmp1, tmp2, tmp3);
2705 }
2706
2707 // Blows src and flags.
store_heap_oop(Address obj,Register new_val,Register tmp1,Register tmp2,Register tmp3,DecoratorSet decorators)2708 void MacroAssembler::store_heap_oop(Address obj, Register new_val, Register tmp1, Register tmp2, Register tmp3, DecoratorSet decorators) {
2709 access_store_at(T_OBJECT, IN_HEAP | decorators, obj, new_val, tmp1, tmp2, tmp3, false);
2710 }
2711
store_heap_oop_null(Address obj,Register new_val,Register tmp1,Register tmp2,Register tmp3,DecoratorSet decorators)2712 void MacroAssembler::store_heap_oop_null(Address obj, Register new_val, Register tmp1, Register tmp2, Register tmp3, DecoratorSet decorators) {
2713 access_store_at(T_OBJECT, IN_HEAP, obj, new_val, tmp1, tmp2, tmp3, true);
2714 }
2715
access_load_at(BasicType type,DecoratorSet decorators,Address src,Register dst,Register tmp1,Register tmp2,Register tmp3)2716 void MacroAssembler::access_load_at(BasicType type, DecoratorSet decorators,
2717 Address src, Register dst, Register tmp1, Register tmp2, Register tmp3) {
2718 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
2719 decorators = AccessInternal::decorator_fixup(decorators);
2720 bool as_raw = (decorators & AS_RAW) != 0;
2721 if (as_raw) {
2722 bs->BarrierSetAssembler::load_at(this, decorators, type, dst, src, tmp1, tmp2, tmp3);
2723 } else {
2724 bs->load_at(this, decorators, type, dst, src, tmp1, tmp2, tmp3);
2725 }
2726 }
2727
access_store_at(BasicType type,DecoratorSet decorators,Address obj,Register new_val,Register tmp1,Register tmp2,Register tmp3,bool is_null)2728 void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators,
2729 Address obj, Register new_val, Register tmp1, Register tmp2, Register tmp3, bool is_null) {
2730 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
2731 decorators = AccessInternal::decorator_fixup(decorators);
2732 bool as_raw = (decorators & AS_RAW) != 0;
2733 if (as_raw) {
2734 bs->BarrierSetAssembler::store_at(this, decorators, type, obj, new_val, tmp1, tmp2, tmp3, is_null);
2735 } else {
2736 bs->store_at(this, decorators, type, obj, new_val, tmp1, tmp2, tmp3, is_null);
2737 }
2738 }
2739
2740
2741 #ifdef AARCH64
2742
2743 // Algorithm must match oop.inline.hpp encode_heap_oop.
encode_heap_oop(Register dst,Register src)2744 void MacroAssembler::encode_heap_oop(Register dst, Register src) {
2745 // This code pattern is matched in NativeIntruction::skip_encode_heap_oop.
2746 // Update it at modifications.
2747 assert (UseCompressedOops, "must be compressed");
2748 assert (Universe::heap() != NULL, "java heap should be initialized");
2749 #ifdef ASSERT
2750 verify_heapbase("MacroAssembler::encode_heap_oop: heap base corrupted?");
2751 #endif
2752 verify_oop(src);
2753 if (Universe::narrow_oop_base() == NULL) {
2754 if (Universe::narrow_oop_shift() != 0) {
2755 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
2756 _lsr(dst, src, Universe::narrow_oop_shift());
2757 } else if (dst != src) {
2758 mov(dst, src);
2759 }
2760 } else {
2761 tst(src, src);
2762 csel(dst, Rheap_base, src, eq);
2763 sub(dst, dst, Rheap_base);
2764 if (Universe::narrow_oop_shift() != 0) {
2765 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
2766 _lsr(dst, dst, Universe::narrow_oop_shift());
2767 }
2768 }
2769 }
2770
2771 // Same algorithm as oop.inline.hpp decode_heap_oop.
decode_heap_oop(Register dst,Register src)2772 void MacroAssembler::decode_heap_oop(Register dst, Register src) {
2773 #ifdef ASSERT
2774 verify_heapbase("MacroAssembler::decode_heap_oop: heap base corrupted?");
2775 #endif
2776 assert(Universe::narrow_oop_shift() == 0 || LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
2777 if (Universe::narrow_oop_base() != NULL) {
2778 tst(src, src);
2779 add(dst, Rheap_base, AsmOperand(src, lsl, Universe::narrow_oop_shift()));
2780 csel(dst, dst, ZR, ne);
2781 } else {
2782 _lsl(dst, src, Universe::narrow_oop_shift());
2783 }
2784 verify_oop(dst);
2785 }
2786
2787 #ifdef COMPILER2
2788 // Algorithm must match oop.inline.hpp encode_heap_oop.
2789 // Must preserve condition codes, or C2 encodeHeapOop_not_null rule
2790 // must be changed.
encode_heap_oop_not_null(Register dst,Register src)2791 void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) {
2792 assert (UseCompressedOops, "must be compressed");
2793 assert (Universe::heap() != NULL, "java heap should be initialized");
2794 #ifdef ASSERT
2795 verify_heapbase("MacroAssembler::encode_heap_oop: heap base corrupted?");
2796 #endif
2797 verify_oop(src);
2798 if (Universe::narrow_oop_base() == NULL) {
2799 if (Universe::narrow_oop_shift() != 0) {
2800 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
2801 _lsr(dst, src, Universe::narrow_oop_shift());
2802 } else if (dst != src) {
2803 mov(dst, src);
2804 }
2805 } else {
2806 sub(dst, src, Rheap_base);
2807 if (Universe::narrow_oop_shift() != 0) {
2808 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
2809 _lsr(dst, dst, Universe::narrow_oop_shift());
2810 }
2811 }
2812 }
2813
2814 // Same algorithm as oops.inline.hpp decode_heap_oop.
2815 // Must preserve condition codes, or C2 decodeHeapOop_not_null rule
2816 // must be changed.
decode_heap_oop_not_null(Register dst,Register src)2817 void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) {
2818 #ifdef ASSERT
2819 verify_heapbase("MacroAssembler::decode_heap_oop: heap base corrupted?");
2820 #endif
2821 assert(Universe::narrow_oop_shift() == 0 || LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
2822 if (Universe::narrow_oop_base() != NULL) {
2823 add(dst, Rheap_base, AsmOperand(src, lsl, Universe::narrow_oop_shift()));
2824 } else {
2825 _lsl(dst, src, Universe::narrow_oop_shift());
2826 }
2827 verify_oop(dst);
2828 }
2829
set_narrow_klass(Register dst,Klass * k)2830 void MacroAssembler::set_narrow_klass(Register dst, Klass* k) {
2831 assert(UseCompressedClassPointers, "should only be used for compressed header");
2832 assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
2833 int klass_index = oop_recorder()->find_index(k);
2834 RelocationHolder rspec = metadata_Relocation::spec(klass_index);
2835
2836 // Relocation with special format (see relocInfo_arm.hpp).
2837 relocate(rspec);
2838 narrowKlass encoded_k = Klass::encode_klass(k);
2839 movz(dst, encoded_k & 0xffff, 0);
2840 movk(dst, (encoded_k >> 16) & 0xffff, 16);
2841 }
2842
set_narrow_oop(Register dst,jobject obj)2843 void MacroAssembler::set_narrow_oop(Register dst, jobject obj) {
2844 assert(UseCompressedOops, "should only be used for compressed header");
2845 assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
2846 int oop_index = oop_recorder()->find_index(obj);
2847 RelocationHolder rspec = oop_Relocation::spec(oop_index);
2848
2849 relocate(rspec);
2850 movz(dst, 0xffff, 0);
2851 movk(dst, 0xffff, 16);
2852 }
2853
2854 #endif // COMPILER2
2855 // Must preserve condition codes, or C2 encodeKlass_not_null rule
2856 // must be changed.
encode_klass_not_null(Register r)2857 void MacroAssembler::encode_klass_not_null(Register r) {
2858 if (Universe::narrow_klass_base() != NULL) {
2859 // Use Rheap_base as a scratch register in which to temporarily load the narrow_klass_base.
2860 assert(r != Rheap_base, "Encoding a klass in Rheap_base");
2861 mov_slow(Rheap_base, Universe::narrow_klass_base());
2862 sub(r, r, Rheap_base);
2863 }
2864 if (Universe::narrow_klass_shift() != 0) {
2865 assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
2866 _lsr(r, r, Universe::narrow_klass_shift());
2867 }
2868 if (Universe::narrow_klass_base() != NULL) {
2869 reinit_heapbase();
2870 }
2871 }
2872
2873 // Must preserve condition codes, or C2 encodeKlass_not_null rule
2874 // must be changed.
encode_klass_not_null(Register dst,Register src)2875 void MacroAssembler::encode_klass_not_null(Register dst, Register src) {
2876 if (dst == src) {
2877 encode_klass_not_null(src);
2878 return;
2879 }
2880 if (Universe::narrow_klass_base() != NULL) {
2881 mov_slow(dst, (int64_t)Universe::narrow_klass_base());
2882 sub(dst, src, dst);
2883 if (Universe::narrow_klass_shift() != 0) {
2884 assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
2885 _lsr(dst, dst, Universe::narrow_klass_shift());
2886 }
2887 } else {
2888 if (Universe::narrow_klass_shift() != 0) {
2889 assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
2890 _lsr(dst, src, Universe::narrow_klass_shift());
2891 } else {
2892 mov(dst, src);
2893 }
2894 }
2895 }
2896
2897 // Function instr_count_for_decode_klass_not_null() counts the instructions
2898 // generated by decode_klass_not_null(register r) and reinit_heapbase(),
2899 // when (Universe::heap() != NULL). Hence, if the instructions they
2900 // generate change, then this method needs to be updated.
instr_count_for_decode_klass_not_null()2901 int MacroAssembler::instr_count_for_decode_klass_not_null() {
2902 assert(UseCompressedClassPointers, "only for compressed klass ptrs");
2903 assert(Universe::heap() != NULL, "java heap should be initialized");
2904 if (Universe::narrow_klass_base() != NULL) {
2905 return instr_count_for_mov_slow(Universe::narrow_klass_base()) + // mov_slow
2906 1 + // add
2907 instr_count_for_mov_slow(Universe::narrow_ptrs_base()); // reinit_heapbase() = mov_slow
2908 } else {
2909 if (Universe::narrow_klass_shift() != 0) {
2910 return 1;
2911 }
2912 }
2913 return 0;
2914 }
2915
2916 // Must preserve condition codes, or C2 decodeKlass_not_null rule
2917 // must be changed.
decode_klass_not_null(Register r)2918 void MacroAssembler::decode_klass_not_null(Register r) {
2919 int off = offset();
2920 assert(UseCompressedClassPointers, "should only be used for compressed headers");
2921 assert(Universe::heap() != NULL, "java heap should be initialized");
2922 assert(r != Rheap_base, "Decoding a klass in Rheap_base");
2923 // Cannot assert, instr_count_for_decode_klass_not_null() counts instructions.
2924 // Also do not verify_oop as this is called by verify_oop.
2925 if (Universe::narrow_klass_base() != NULL) {
2926 // Use Rheap_base as a scratch register in which to temporarily load the narrow_klass_base.
2927 mov_slow(Rheap_base, Universe::narrow_klass_base());
2928 add(r, Rheap_base, AsmOperand(r, lsl, Universe::narrow_klass_shift()));
2929 reinit_heapbase();
2930 } else {
2931 if (Universe::narrow_klass_shift() != 0) {
2932 assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
2933 _lsl(r, r, Universe::narrow_klass_shift());
2934 }
2935 }
2936 assert((offset() - off) == (instr_count_for_decode_klass_not_null() * InstructionSize), "need to fix instr_count_for_decode_klass_not_null");
2937 }
2938
2939 // Must preserve condition codes, or C2 decodeKlass_not_null rule
2940 // must be changed.
decode_klass_not_null(Register dst,Register src)2941 void MacroAssembler::decode_klass_not_null(Register dst, Register src) {
2942 if (src == dst) {
2943 decode_klass_not_null(src);
2944 return;
2945 }
2946
2947 assert(UseCompressedClassPointers, "should only be used for compressed headers");
2948 assert(Universe::heap() != NULL, "java heap should be initialized");
2949 assert(src != Rheap_base, "Decoding a klass in Rheap_base");
2950 assert(dst != Rheap_base, "Decoding a klass into Rheap_base");
2951 // Also do not verify_oop as this is called by verify_oop.
2952 if (Universe::narrow_klass_base() != NULL) {
2953 mov_slow(dst, Universe::narrow_klass_base());
2954 add(dst, dst, AsmOperand(src, lsl, Universe::narrow_klass_shift()));
2955 } else {
2956 _lsl(dst, src, Universe::narrow_klass_shift());
2957 }
2958 }
2959
2960
reinit_heapbase()2961 void MacroAssembler::reinit_heapbase() {
2962 if (UseCompressedOops || UseCompressedClassPointers) {
2963 if (Universe::heap() != NULL) {
2964 mov_slow(Rheap_base, Universe::narrow_ptrs_base());
2965 } else {
2966 ldr_global_ptr(Rheap_base, (address)Universe::narrow_ptrs_base_addr());
2967 }
2968 }
2969 }
2970
2971 #ifdef ASSERT
verify_heapbase(const char * msg)2972 void MacroAssembler::verify_heapbase(const char* msg) {
2973 // This code pattern is matched in NativeIntruction::skip_verify_heapbase.
2974 // Update it at modifications.
2975 assert (UseCompressedOops, "should be compressed");
2976 assert (Universe::heap() != NULL, "java heap should be initialized");
2977 if (CheckCompressedOops) {
2978 Label ok;
2979 str(Rthread, Address(Rthread, in_bytes(JavaThread::in_top_frame_unsafe_section_offset())));
2980 raw_push(Rtemp, ZR);
2981 mrs(Rtemp, Assembler::SysReg_NZCV);
2982 str(Rtemp, Address(SP, 1 * wordSize));
2983 mov_slow(Rtemp, Universe::narrow_ptrs_base());
2984 cmp(Rheap_base, Rtemp);
2985 b(ok, eq);
2986 stop(msg);
2987 bind(ok);
2988 ldr(Rtemp, Address(SP, 1 * wordSize));
2989 msr(Assembler::SysReg_NZCV, Rtemp);
2990 raw_pop(Rtemp, ZR);
2991 str(ZR, Address(Rthread, in_bytes(JavaThread::in_top_frame_unsafe_section_offset())));
2992 }
2993 }
2994 #endif // ASSERT
2995
2996 #endif // AARCH64
2997
2998 #ifdef COMPILER2
fast_lock(Register Roop,Register Rbox,Register Rscratch,Register Rscratch2,Register scratch3)2999 void MacroAssembler::fast_lock(Register Roop, Register Rbox, Register Rscratch, Register Rscratch2, Register scratch3)
3000 {
3001 assert(VM_Version::supports_ldrex(), "unsupported, yet?");
3002
3003 Register Rmark = Rscratch2;
3004
3005 assert(Roop != Rscratch, "");
3006 assert(Roop != Rmark, "");
3007 assert(Rbox != Rscratch, "");
3008 assert(Rbox != Rmark, "");
3009
3010 Label fast_lock, done;
3011
3012 if (UseBiasedLocking && !UseOptoBiasInlining) {
3013 assert(scratch3 != noreg, "need extra temporary for -XX:-UseOptoBiasInlining");
3014 biased_locking_enter(Roop, Rmark, Rscratch, false, scratch3, done, done);
3015 // Fall through if lock not biased otherwise branch to done
3016 }
3017
3018 // Invariant: Rmark loaded below does not contain biased lock pattern
3019
3020 ldr(Rmark, Address(Roop, oopDesc::mark_offset_in_bytes()));
3021 tst(Rmark, markOopDesc::unlocked_value);
3022 b(fast_lock, ne);
3023
3024 // Check for recursive lock
3025 // See comments in InterpreterMacroAssembler::lock_object for
3026 // explanations on the fast recursive locking check.
3027 #ifdef AARCH64
3028 intptr_t mask = ((intptr_t)3) - ((intptr_t)os::vm_page_size());
3029 Assembler::LogicalImmediate imm(mask, false);
3030 mov(Rscratch, SP);
3031 sub(Rscratch, Rmark, Rscratch);
3032 ands(Rscratch, Rscratch, imm);
3033 // set to zero if recursive lock, set to non zero otherwise (see discussion in JDK-8153107)
3034 str(Rscratch, Address(Rbox, BasicLock::displaced_header_offset_in_bytes()));
3035 b(done);
3036
3037 #else
3038 // -1- test low 2 bits
3039 movs(Rscratch, AsmOperand(Rmark, lsl, 30));
3040 // -2- test (hdr - SP) if the low two bits are 0
3041 sub(Rscratch, Rmark, SP, eq);
3042 movs(Rscratch, AsmOperand(Rscratch, lsr, exact_log2(os::vm_page_size())), eq);
3043 // If still 'eq' then recursive locking OK
3044 // set to zero if recursive lock, set to non zero otherwise (see discussion in JDK-8153107)
3045 str(Rscratch, Address(Rbox, BasicLock::displaced_header_offset_in_bytes()));
3046 b(done);
3047 #endif
3048
3049 bind(fast_lock);
3050 str(Rmark, Address(Rbox, BasicLock::displaced_header_offset_in_bytes()));
3051
3052 bool allow_fallthrough_on_failure = true;
3053 bool one_shot = true;
3054 cas_for_lock_acquire(Rmark, Rbox, Roop, Rscratch, done, allow_fallthrough_on_failure, one_shot);
3055
3056 bind(done);
3057
3058 // At this point flags are set as follows:
3059 // EQ -> Success
3060 // NE -> Failure, branch to slow path
3061 }
3062
fast_unlock(Register Roop,Register Rbox,Register Rscratch,Register Rscratch2 AARCH64_ONLY_ARG (Register Rscratch3))3063 void MacroAssembler::fast_unlock(Register Roop, Register Rbox, Register Rscratch, Register Rscratch2 AARCH64_ONLY_ARG(Register Rscratch3))
3064 {
3065 assert(VM_Version::supports_ldrex(), "unsupported, yet?");
3066
3067 Register Rmark = Rscratch2;
3068
3069 assert(Roop != Rscratch, "");
3070 assert(Roop != Rmark, "");
3071 assert(Rbox != Rscratch, "");
3072 assert(Rbox != Rmark, "");
3073
3074 Label done;
3075
3076 if (UseBiasedLocking && !UseOptoBiasInlining) {
3077 biased_locking_exit(Roop, Rscratch, done);
3078 }
3079
3080 ldr(Rmark, Address(Rbox, BasicLock::displaced_header_offset_in_bytes()));
3081 // If hdr is NULL, we've got recursive locking and there's nothing more to do
3082 cmp(Rmark, 0);
3083 b(done, eq);
3084
3085 // Restore the object header
3086 bool allow_fallthrough_on_failure = true;
3087 bool one_shot = true;
3088 cas_for_lock_release(Rmark, Rbox, Roop, Rscratch, done, allow_fallthrough_on_failure, one_shot);
3089
3090 bind(done);
3091
3092 }
3093 #endif // COMPILER2
3094