1 /*
2  * Copyright (c) 2008, 2018, Oracle and/or its affiliates. All rights reserved.
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * This code is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 only, as
7  * published by the Free Software Foundation.
8  *
9  * This code is distributed in the hope that it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12  * version 2 for more details (a copy is included in the LICENSE file that
13  * accompanied this code).
14  *
15  * You should have received a copy of the GNU General Public License version
16  * 2 along with this work; if not, write to the Free Software Foundation,
17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18  *
19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20  * or visit www.oracle.com if you need additional information or have any
21  * questions.
22  *
23  */
24 
25 #include "precompiled.hpp"
26 #include "asm/assembler.hpp"
27 #include "asm/assembler.inline.hpp"
28 #include "asm/macroAssembler.hpp"
29 #include "ci/ciEnv.hpp"
30 #include "code/nativeInst.hpp"
31 #include "compiler/disassembler.hpp"
32 #include "gc/shared/barrierSet.hpp"
33 #include "gc/shared/cardTable.hpp"
34 #include "gc/shared/barrierSetAssembler.hpp"
35 #include "gc/shared/cardTableBarrierSet.hpp"
36 #include "gc/shared/collectedHeap.inline.hpp"
37 #include "interpreter/interpreter.hpp"
38 #include "memory/resourceArea.hpp"
39 #include "oops/accessDecorators.hpp"
40 #include "oops/klass.inline.hpp"
41 #include "prims/methodHandles.hpp"
42 #include "runtime/biasedLocking.hpp"
43 #include "runtime/interfaceSupport.inline.hpp"
44 #include "runtime/objectMonitor.hpp"
45 #include "runtime/os.hpp"
46 #include "runtime/sharedRuntime.hpp"
47 #include "runtime/stubRoutines.hpp"
48 #include "utilities/macros.hpp"
49 
50 // Implementation of AddressLiteral
51 
set_rspec(relocInfo::relocType rtype)52 void AddressLiteral::set_rspec(relocInfo::relocType rtype) {
53   switch (rtype) {
54   case relocInfo::oop_type:
55     // Oops are a special case. Normally they would be their own section
56     // but in cases like icBuffer they are literals in the code stream that
57     // we don't have a section for. We use none so that we get a literal address
58     // which is always patchable.
59     break;
60   case relocInfo::external_word_type:
61     _rspec = external_word_Relocation::spec(_target);
62     break;
63   case relocInfo::internal_word_type:
64     _rspec = internal_word_Relocation::spec(_target);
65     break;
66   case relocInfo::opt_virtual_call_type:
67     _rspec = opt_virtual_call_Relocation::spec();
68     break;
69   case relocInfo::static_call_type:
70     _rspec = static_call_Relocation::spec();
71     break;
72   case relocInfo::runtime_call_type:
73     _rspec = runtime_call_Relocation::spec();
74     break;
75   case relocInfo::poll_type:
76   case relocInfo::poll_return_type:
77     _rspec = Relocation::spec_simple(rtype);
78     break;
79   case relocInfo::none:
80     break;
81   default:
82     ShouldNotReachHere();
83     break;
84   }
85 }
86 
87 // Initially added to the Assembler interface as a pure virtual:
88 //   RegisterConstant delayed_value(..)
89 // for:
90 //   6812678 macro assembler needs delayed binding of a few constants (for 6655638)
91 // this was subsequently modified to its present name and return type
delayed_value_impl(intptr_t * delayed_value_addr,Register tmp,int offset)92 RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr,
93                                                       Register tmp,
94                                                       int offset) {
95   ShouldNotReachHere();
96   return RegisterOrConstant(-1);
97 }
98 
99 
100 #ifdef AARCH64
101 // Note: ARM32 version is OS dependent
breakpoint(AsmCondition cond)102 void MacroAssembler::breakpoint(AsmCondition cond) {
103   if (cond == al) {
104     brk();
105   } else {
106     Label L;
107     b(L, inverse(cond));
108     brk();
109     bind(L);
110   }
111 }
112 #endif // AARCH64
113 
114 
115 // virtual method calling
lookup_virtual_method(Register recv_klass,Register vtable_index,Register method_result)116 void MacroAssembler::lookup_virtual_method(Register recv_klass,
117                                            Register vtable_index,
118                                            Register method_result) {
119   const int base_offset = in_bytes(Klass::vtable_start_offset()) + vtableEntry::method_offset_in_bytes();
120   assert(vtableEntry::size() * wordSize == wordSize, "adjust the scaling in the code below");
121   add(recv_klass, recv_klass, AsmOperand(vtable_index, lsl, LogBytesPerWord));
122   ldr(method_result, Address(recv_klass, base_offset));
123 }
124 
125 
126 // Simplified, combined version, good for typical uses.
127 // Falls through on failure.
check_klass_subtype(Register sub_klass,Register super_klass,Register temp_reg,Register temp_reg2,Register temp_reg3,Label & L_success)128 void MacroAssembler::check_klass_subtype(Register sub_klass,
129                                          Register super_klass,
130                                          Register temp_reg,
131                                          Register temp_reg2,
132                                          Register temp_reg3,
133                                          Label& L_success) {
134   Label L_failure;
135   check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg, temp_reg2, &L_success, &L_failure, NULL);
136   check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, temp_reg2, temp_reg3, &L_success, NULL);
137   bind(L_failure);
138 };
139 
check_klass_subtype_fast_path(Register sub_klass,Register super_klass,Register temp_reg,Register temp_reg2,Label * L_success,Label * L_failure,Label * L_slow_path)140 void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
141                                                    Register super_klass,
142                                                    Register temp_reg,
143                                                    Register temp_reg2,
144                                                    Label* L_success,
145                                                    Label* L_failure,
146                                                    Label* L_slow_path) {
147 
148   assert_different_registers(sub_klass, super_klass, temp_reg, temp_reg2, noreg);
149   const Register super_check_offset = temp_reg2;
150 
151   Label L_fallthrough;
152   int label_nulls = 0;
153   if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
154   if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
155   if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; }
156   assert(label_nulls <= 1, "at most one NULL in the batch");
157 
158   int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
159   int sco_offset = in_bytes(Klass::super_check_offset_offset());
160   Address super_check_offset_addr(super_klass, sco_offset);
161 
162   // If the pointers are equal, we are done (e.g., String[] elements).
163   // This self-check enables sharing of secondary supertype arrays among
164   // non-primary types such as array-of-interface.  Otherwise, each such
165   // type would need its own customized SSA.
166   // We move this check to the front of the fast path because many
167   // type checks are in fact trivially successful in this manner,
168   // so we get a nicely predicted branch right at the start of the check.
169   cmp(sub_klass, super_klass);
170   b(*L_success, eq);
171 
172   // Check the supertype display:
173   ldr_u32(super_check_offset, super_check_offset_addr);
174 
175   Address super_check_addr(sub_klass, super_check_offset);
176   ldr(temp_reg, super_check_addr);
177   cmp(super_klass, temp_reg); // load displayed supertype
178 
179   // This check has worked decisively for primary supers.
180   // Secondary supers are sought in the super_cache ('super_cache_addr').
181   // (Secondary supers are interfaces and very deeply nested subtypes.)
182   // This works in the same check above because of a tricky aliasing
183   // between the super_cache and the primary super display elements.
184   // (The 'super_check_addr' can address either, as the case requires.)
185   // Note that the cache is updated below if it does not help us find
186   // what we need immediately.
187   // So if it was a primary super, we can just fail immediately.
188   // Otherwise, it's the slow path for us (no success at this point).
189 
190   b(*L_success, eq);
191   cmp_32(super_check_offset, sc_offset);
192   if (L_failure == &L_fallthrough) {
193     b(*L_slow_path, eq);
194   } else {
195     b(*L_failure, ne);
196     if (L_slow_path != &L_fallthrough) {
197       b(*L_slow_path);
198     }
199   }
200 
201   bind(L_fallthrough);
202 }
203 
204 
check_klass_subtype_slow_path(Register sub_klass,Register super_klass,Register temp_reg,Register temp2_reg,Register temp3_reg,Label * L_success,Label * L_failure,bool set_cond_codes)205 void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
206                                                    Register super_klass,
207                                                    Register temp_reg,
208                                                    Register temp2_reg,
209                                                    Register temp3_reg,
210                                                    Label* L_success,
211                                                    Label* L_failure,
212                                                    bool set_cond_codes) {
213 #ifdef AARCH64
214   NOT_IMPLEMENTED();
215 #else
216   // Note: if used by code that expects a register to be 0 on success,
217   // this register must be temp_reg and set_cond_codes must be true
218 
219   Register saved_reg = noreg;
220 
221   // get additional tmp registers
222   if (temp3_reg == noreg) {
223     saved_reg = temp3_reg = LR;
224     push(saved_reg);
225   }
226 
227   assert(temp2_reg != noreg, "need all the temporary registers");
228   assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg, temp3_reg);
229 
230   Register cmp_temp = temp_reg;
231   Register scan_temp = temp3_reg;
232   Register count_temp = temp2_reg;
233 
234   Label L_fallthrough;
235   int label_nulls = 0;
236   if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
237   if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
238   assert(label_nulls <= 1, "at most one NULL in the batch");
239 
240   // a couple of useful fields in sub_klass:
241   int ss_offset = in_bytes(Klass::secondary_supers_offset());
242   int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
243   Address secondary_supers_addr(sub_klass, ss_offset);
244   Address super_cache_addr(     sub_klass, sc_offset);
245 
246 #ifndef PRODUCT
247   inc_counter((address)&SharedRuntime::_partial_subtype_ctr, scan_temp, count_temp);
248 #endif
249 
250   // We will consult the secondary-super array.
251   ldr(scan_temp, Address(sub_klass, ss_offset));
252 
253   assert(! UseCompressedOops, "search_key must be the compressed super_klass");
254   // else search_key is the
255   Register search_key = super_klass;
256 
257   // Load the array length.
258   ldr(count_temp, Address(scan_temp, Array<Klass*>::length_offset_in_bytes()));
259   add(scan_temp, scan_temp, Array<Klass*>::base_offset_in_bytes());
260 
261   add(count_temp, count_temp, 1);
262 
263   Label L_loop, L_setnz_and_fail, L_fail;
264 
265   // Top of search loop
266   bind(L_loop);
267   // Notes:
268   //  scan_temp starts at the array elements
269   //  count_temp is 1+size
270   subs(count_temp, count_temp, 1);
271   if ((L_failure != &L_fallthrough) && (! set_cond_codes) && (saved_reg == noreg)) {
272     // direct jump to L_failure if failed and no cleanup needed
273     b(*L_failure, eq); // not found and
274   } else {
275     b(L_fail, eq); // not found in the array
276   }
277 
278   // Load next super to check
279   // In the array of super classes elements are pointer sized.
280   int element_size = wordSize;
281   ldr(cmp_temp, Address(scan_temp, element_size, post_indexed));
282 
283   // Look for Rsuper_klass on Rsub_klass's secondary super-class-overflow list
284   subs(cmp_temp, cmp_temp, search_key);
285 
286   // A miss means we are NOT a subtype and need to keep looping
287   b(L_loop, ne);
288 
289   // Falling out the bottom means we found a hit; we ARE a subtype
290 
291   // Note: temp_reg/cmp_temp is already 0 and flag Z is set
292 
293   // Success.  Cache the super we found and proceed in triumph.
294   str(super_klass, Address(sub_klass, sc_offset));
295 
296   if (saved_reg != noreg) {
297     // Return success
298     pop(saved_reg);
299   }
300 
301   b(*L_success);
302 
303   bind(L_fail);
304   // Note1: check "b(*L_failure, eq)" above if adding extra instructions here
305   if (set_cond_codes) {
306     movs(temp_reg, sub_klass); // clears Z and sets temp_reg to non-0 if needed
307   }
308   if (saved_reg != noreg) {
309     pop(saved_reg);
310   }
311   if (L_failure != &L_fallthrough) {
312     b(*L_failure);
313   }
314 
315   bind(L_fallthrough);
316 #endif
317 }
318 
319 // Returns address of receiver parameter, using tmp as base register. tmp and params_count can be the same.
receiver_argument_address(Register params_base,Register params_count,Register tmp)320 Address MacroAssembler::receiver_argument_address(Register params_base, Register params_count, Register tmp) {
321   assert_different_registers(params_base, params_count);
322   add(tmp, params_base, AsmOperand(params_count, lsl, Interpreter::logStackElementSize));
323   return Address(tmp, -Interpreter::stackElementSize);
324 }
325 
326 
align(int modulus)327 void MacroAssembler::align(int modulus) {
328   while (offset() % modulus != 0) {
329     nop();
330   }
331 }
332 
set_last_Java_frame(Register last_java_sp,Register last_java_fp,bool save_last_java_pc,Register tmp)333 int MacroAssembler::set_last_Java_frame(Register last_java_sp,
334                                         Register last_java_fp,
335                                         bool save_last_java_pc,
336                                         Register tmp) {
337   int pc_offset;
338   if (last_java_fp != noreg) {
339     // optional
340     str(last_java_fp, Address(Rthread, JavaThread::last_Java_fp_offset()));
341     _fp_saved = true;
342   } else {
343     _fp_saved = false;
344   }
345   if (AARCH64_ONLY(true) NOT_AARCH64(save_last_java_pc)) { // optional on 32-bit ARM
346 #ifdef AARCH64
347     pc_offset = mov_pc_to(tmp);
348     str(tmp, Address(Rthread, JavaThread::last_Java_pc_offset()));
349 #else
350     str(PC, Address(Rthread, JavaThread::last_Java_pc_offset()));
351     pc_offset = offset() + VM_Version::stored_pc_adjustment();
352 #endif
353     _pc_saved = true;
354   } else {
355     _pc_saved = false;
356     pc_offset = -1;
357   }
358   // According to comment in javaFrameAnchorm SP must be saved last, so that other
359   // entries are valid when SP is set.
360 
361   // However, this is probably not a strong constrainst since for instance PC is
362   // sometimes read from the stack at SP... but is pushed later (by the call). Hence,
363   // we now write the fields in the expected order but we have not added a StoreStore
364   // barrier.
365 
366   // XXX: if the ordering is really important, PC should always be saved (without forgetting
367   // to update oop_map offsets) and a StoreStore barrier might be needed.
368 
369   if (last_java_sp == noreg) {
370     last_java_sp = SP; // always saved
371   }
372 #ifdef AARCH64
373   if (last_java_sp == SP) {
374     mov(tmp, SP);
375     str(tmp, Address(Rthread, JavaThread::last_Java_sp_offset()));
376   } else {
377     str(last_java_sp, Address(Rthread, JavaThread::last_Java_sp_offset()));
378   }
379 #else
380   str(last_java_sp, Address(Rthread, JavaThread::last_Java_sp_offset()));
381 #endif
382 
383   return pc_offset; // for oopmaps
384 }
385 
reset_last_Java_frame(Register tmp)386 void MacroAssembler::reset_last_Java_frame(Register tmp) {
387   const Register Rzero = zero_register(tmp);
388   str(Rzero, Address(Rthread, JavaThread::last_Java_sp_offset()));
389   if (_fp_saved) {
390     str(Rzero, Address(Rthread, JavaThread::last_Java_fp_offset()));
391   }
392   if (_pc_saved) {
393     str(Rzero, Address(Rthread, JavaThread::last_Java_pc_offset()));
394   }
395 }
396 
397 
398 // Implementation of call_VM versions
399 
call_VM_leaf_helper(address entry_point,int number_of_arguments)400 void MacroAssembler::call_VM_leaf_helper(address entry_point, int number_of_arguments) {
401   assert(number_of_arguments >= 0, "cannot have negative number of arguments");
402   assert(number_of_arguments <= 4, "cannot have more than 4 arguments");
403 
404 #ifndef AARCH64
405   // Safer to save R9 here since callers may have been written
406   // assuming R9 survives. This is suboptimal but is not worth
407   // optimizing for the few platforms where R9 is scratched.
408   push(RegisterSet(R4) | R9ifScratched);
409   mov(R4, SP);
410   bic(SP, SP, StackAlignmentInBytes - 1);
411 #endif // AARCH64
412   call(entry_point, relocInfo::runtime_call_type);
413 #ifndef AARCH64
414   mov(SP, R4);
415   pop(RegisterSet(R4) | R9ifScratched);
416 #endif // AARCH64
417 }
418 
419 
call_VM_helper(Register oop_result,address entry_point,int number_of_arguments,bool check_exceptions)420 void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) {
421   assert(number_of_arguments >= 0, "cannot have negative number of arguments");
422   assert(number_of_arguments <= 3, "cannot have more than 3 arguments");
423 
424   const Register tmp = Rtemp;
425   assert_different_registers(oop_result, tmp);
426 
427   set_last_Java_frame(SP, FP, true, tmp);
428 
429 #ifdef ASSERT
430   AARCH64_ONLY(if (UseCompressedOops || UseCompressedClassPointers) { verify_heapbase("call_VM_helper: heap base corrupted?"); });
431 #endif // ASSERT
432 
433 #ifndef AARCH64
434 #if R9_IS_SCRATCHED
435   // Safer to save R9 here since callers may have been written
436   // assuming R9 survives. This is suboptimal but is not worth
437   // optimizing for the few platforms where R9 is scratched.
438 
439   // Note: cannot save R9 above the saved SP (some calls expect for
440   // instance the Java stack top at the saved SP)
441   // => once saved (with set_last_Java_frame), decrease SP before rounding to
442   // ensure the slot at SP will be free for R9).
443   sub(SP, SP, 4);
444   bic(SP, SP, StackAlignmentInBytes - 1);
445   str(R9, Address(SP, 0));
446 #else
447   bic(SP, SP, StackAlignmentInBytes - 1);
448 #endif // R9_IS_SCRATCHED
449 #endif
450 
451   mov(R0, Rthread);
452   call(entry_point, relocInfo::runtime_call_type);
453 
454 #ifndef AARCH64
455 #if R9_IS_SCRATCHED
456   ldr(R9, Address(SP, 0));
457 #endif
458   ldr(SP, Address(Rthread, JavaThread::last_Java_sp_offset()));
459 #endif
460 
461   reset_last_Java_frame(tmp);
462 
463   // C++ interp handles this in the interpreter
464   check_and_handle_popframe();
465   check_and_handle_earlyret();
466 
467   if (check_exceptions) {
468     // check for pending exceptions
469     ldr(tmp, Address(Rthread, Thread::pending_exception_offset()));
470 #ifdef AARCH64
471     Label L;
472     cbz(tmp, L);
473     mov_pc_to(Rexception_pc);
474     b(StubRoutines::forward_exception_entry());
475     bind(L);
476 #else
477     cmp(tmp, 0);
478     mov(Rexception_pc, PC, ne);
479     b(StubRoutines::forward_exception_entry(), ne);
480 #endif // AARCH64
481   }
482 
483   // get oop result if there is one and reset the value in the thread
484   if (oop_result->is_valid()) {
485     get_vm_result(oop_result, tmp);
486   }
487 }
488 
call_VM(Register oop_result,address entry_point,bool check_exceptions)489 void MacroAssembler::call_VM(Register oop_result, address entry_point, bool check_exceptions) {
490   call_VM_helper(oop_result, entry_point, 0, check_exceptions);
491 }
492 
493 
call_VM(Register oop_result,address entry_point,Register arg_1,bool check_exceptions)494 void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, bool check_exceptions) {
495   assert (arg_1 == R1, "fixed register for arg_1");
496   call_VM_helper(oop_result, entry_point, 1, check_exceptions);
497 }
498 
499 
call_VM(Register oop_result,address entry_point,Register arg_1,Register arg_2,bool check_exceptions)500 void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, bool check_exceptions) {
501   assert (arg_1 == R1, "fixed register for arg_1");
502   assert (arg_2 == R2, "fixed register for arg_2");
503   call_VM_helper(oop_result, entry_point, 2, check_exceptions);
504 }
505 
506 
call_VM(Register oop_result,address entry_point,Register arg_1,Register arg_2,Register arg_3,bool check_exceptions)507 void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions) {
508   assert (arg_1 == R1, "fixed register for arg_1");
509   assert (arg_2 == R2, "fixed register for arg_2");
510   assert (arg_3 == R3, "fixed register for arg_3");
511   call_VM_helper(oop_result, entry_point, 3, check_exceptions);
512 }
513 
514 
call_VM(Register oop_result,Register last_java_sp,address entry_point,int number_of_arguments,bool check_exceptions)515 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, int number_of_arguments, bool check_exceptions) {
516   // Not used on ARM
517   Unimplemented();
518 }
519 
520 
call_VM(Register oop_result,Register last_java_sp,address entry_point,Register arg_1,bool check_exceptions)521 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, bool check_exceptions) {
522   // Not used on ARM
523   Unimplemented();
524 }
525 
526 
call_VM(Register oop_result,Register last_java_sp,address entry_point,Register arg_1,Register arg_2,bool check_exceptions)527 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, bool check_exceptions) {
528 // Not used on ARM
529   Unimplemented();
530 }
531 
532 
call_VM(Register oop_result,Register last_java_sp,address entry_point,Register arg_1,Register arg_2,Register arg_3,bool check_exceptions)533 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions) {
534   // Not used on ARM
535   Unimplemented();
536 }
537 
538 // Raw call, without saving/restoring registers, exception handling, etc.
539 // Mainly used from various stubs.
call_VM(address entry_point,bool save_R9_if_scratched)540 void MacroAssembler::call_VM(address entry_point, bool save_R9_if_scratched) {
541   const Register tmp = Rtemp; // Rtemp free since scratched by call
542   set_last_Java_frame(SP, FP, true, tmp);
543 #if R9_IS_SCRATCHED
544   if (save_R9_if_scratched) {
545     // Note: Saving also R10 for alignment.
546     push(RegisterSet(R9, R10));
547   }
548 #endif
549   mov(R0, Rthread);
550   call(entry_point, relocInfo::runtime_call_type);
551 #if R9_IS_SCRATCHED
552   if (save_R9_if_scratched) {
553     pop(RegisterSet(R9, R10));
554   }
555 #endif
556   reset_last_Java_frame(tmp);
557 }
558 
call_VM_leaf(address entry_point)559 void MacroAssembler::call_VM_leaf(address entry_point) {
560   call_VM_leaf_helper(entry_point, 0);
561 }
562 
call_VM_leaf(address entry_point,Register arg_1)563 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1) {
564   assert (arg_1 == R0, "fixed register for arg_1");
565   call_VM_leaf_helper(entry_point, 1);
566 }
567 
call_VM_leaf(address entry_point,Register arg_1,Register arg_2)568 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2) {
569   assert (arg_1 == R0, "fixed register for arg_1");
570   assert (arg_2 == R1, "fixed register for arg_2");
571   call_VM_leaf_helper(entry_point, 2);
572 }
573 
call_VM_leaf(address entry_point,Register arg_1,Register arg_2,Register arg_3)574 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3) {
575   assert (arg_1 == R0, "fixed register for arg_1");
576   assert (arg_2 == R1, "fixed register for arg_2");
577   assert (arg_3 == R2, "fixed register for arg_3");
578   call_VM_leaf_helper(entry_point, 3);
579 }
580 
call_VM_leaf(address entry_point,Register arg_1,Register arg_2,Register arg_3,Register arg_4)581 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3, Register arg_4) {
582   assert (arg_1 == R0, "fixed register for arg_1");
583   assert (arg_2 == R1, "fixed register for arg_2");
584   assert (arg_3 == R2, "fixed register for arg_3");
585   assert (arg_4 == R3, "fixed register for arg_4");
586   call_VM_leaf_helper(entry_point, 4);
587 }
588 
get_vm_result(Register oop_result,Register tmp)589 void MacroAssembler::get_vm_result(Register oop_result, Register tmp) {
590   assert_different_registers(oop_result, tmp);
591   ldr(oop_result, Address(Rthread, JavaThread::vm_result_offset()));
592   str(zero_register(tmp), Address(Rthread, JavaThread::vm_result_offset()));
593   verify_oop(oop_result);
594 }
595 
get_vm_result_2(Register metadata_result,Register tmp)596 void MacroAssembler::get_vm_result_2(Register metadata_result, Register tmp) {
597   assert_different_registers(metadata_result, tmp);
598   ldr(metadata_result, Address(Rthread, JavaThread::vm_result_2_offset()));
599   str(zero_register(tmp), Address(Rthread, JavaThread::vm_result_2_offset()));
600 }
601 
add_rc(Register dst,Register arg1,RegisterOrConstant arg2)602 void MacroAssembler::add_rc(Register dst, Register arg1, RegisterOrConstant arg2) {
603   if (arg2.is_register()) {
604     add(dst, arg1, arg2.as_register());
605   } else {
606     add(dst, arg1, arg2.as_constant());
607   }
608 }
609 
add_slow(Register rd,Register rn,int c)610 void MacroAssembler::add_slow(Register rd, Register rn, int c) {
611 #ifdef AARCH64
612   if (c == 0) {
613     if (rd != rn) {
614       mov(rd, rn);
615     }
616     return;
617   }
618   if (c < 0) {
619     sub_slow(rd, rn, -c);
620     return;
621   }
622   if (c > right_n_bits(24)) {
623     guarantee(rd != rn, "no large add_slow with only one register");
624     mov_slow(rd, c);
625     add(rd, rn, rd);
626   } else {
627     int lo = c & right_n_bits(12);
628     int hi = (c >> 12) & right_n_bits(12);
629     if (lo != 0) {
630       add(rd, rn, lo, lsl0);
631     }
632     if (hi != 0) {
633       add(rd, (lo == 0) ? rn : rd, hi, lsl12);
634     }
635   }
636 #else
637   // This function is used in compiler for handling large frame offsets
638   if ((c < 0) && (((-c) & ~0x3fc) == 0)) {
639     return sub(rd, rn, (-c));
640   }
641   int low = c & 0x3fc;
642   if (low != 0) {
643     add(rd, rn, low);
644     rn = rd;
645   }
646   if (c & ~0x3fc) {
647     assert(AsmOperand::is_rotated_imm(c & ~0x3fc), "unsupported add_slow offset %d", c);
648     add(rd, rn, c & ~0x3fc);
649   } else if (rd != rn) {
650     assert(c == 0, "");
651     mov(rd, rn); // need to generate at least one move!
652   }
653 #endif // AARCH64
654 }
655 
sub_slow(Register rd,Register rn,int c)656 void MacroAssembler::sub_slow(Register rd, Register rn, int c) {
657 #ifdef AARCH64
658   if (c <= 0) {
659     add_slow(rd, rn, -c);
660     return;
661   }
662   if (c > right_n_bits(24)) {
663     guarantee(rd != rn, "no large sub_slow with only one register");
664     mov_slow(rd, c);
665     sub(rd, rn, rd);
666   } else {
667     int lo = c & right_n_bits(12);
668     int hi = (c >> 12) & right_n_bits(12);
669     if (lo != 0) {
670       sub(rd, rn, lo, lsl0);
671     }
672     if (hi != 0) {
673       sub(rd, (lo == 0) ? rn : rd, hi, lsl12);
674     }
675   }
676 #else
677   // This function is used in compiler for handling large frame offsets
678   if ((c < 0) && (((-c) & ~0x3fc) == 0)) {
679     return add(rd, rn, (-c));
680   }
681   int low = c & 0x3fc;
682   if (low != 0) {
683     sub(rd, rn, low);
684     rn = rd;
685   }
686   if (c & ~0x3fc) {
687     assert(AsmOperand::is_rotated_imm(c & ~0x3fc), "unsupported sub_slow offset %d", c);
688     sub(rd, rn, c & ~0x3fc);
689   } else if (rd != rn) {
690     assert(c == 0, "");
691     mov(rd, rn); // need to generate at least one move!
692   }
693 #endif // AARCH64
694 }
695 
mov_slow(Register rd,address addr)696 void MacroAssembler::mov_slow(Register rd, address addr) {
697   // do *not* call the non relocated mov_related_address
698   mov_slow(rd, (intptr_t)addr);
699 }
700 
mov_slow(Register rd,const char * str)701 void MacroAssembler::mov_slow(Register rd, const char *str) {
702   mov_slow(rd, (intptr_t)str);
703 }
704 
705 #ifdef AARCH64
706 
707 // Common code for mov_slow and instr_count_for_mov_slow.
708 // Returns number of instructions of mov_slow pattern,
709 // generating it if non-null MacroAssembler is given.
mov_slow_helper(Register rd,intptr_t c,MacroAssembler * masm)710 int MacroAssembler::mov_slow_helper(Register rd, intptr_t c, MacroAssembler* masm) {
711   // This code pattern is matched in NativeIntruction::is_mov_slow.
712   // Update it at modifications.
713 
714   const intx mask = right_n_bits(16);
715   // 1 movz instruction
716   for (int base_shift = 0; base_shift < 64; base_shift += 16) {
717     if ((c & ~(mask << base_shift)) == 0) {
718       if (masm != NULL) {
719         masm->movz(rd, ((uintx)c) >> base_shift, base_shift);
720       }
721       return 1;
722     }
723   }
724   // 1 movn instruction
725   for (int base_shift = 0; base_shift < 64; base_shift += 16) {
726     if (((~c) & ~(mask << base_shift)) == 0) {
727       if (masm != NULL) {
728         masm->movn(rd, ((uintx)(~c)) >> base_shift, base_shift);
729       }
730       return 1;
731     }
732   }
733   // 1 orr instruction
734   {
735     LogicalImmediate imm(c, false);
736     if (imm.is_encoded()) {
737       if (masm != NULL) {
738         masm->orr(rd, ZR, imm);
739       }
740       return 1;
741     }
742   }
743   // 1 movz/movn + up to 3 movk instructions
744   int zeroes = 0;
745   int ones = 0;
746   for (int base_shift = 0; base_shift < 64; base_shift += 16) {
747     int part = (c >> base_shift) & mask;
748     if (part == 0) {
749       ++zeroes;
750     } else if (part == mask) {
751       ++ones;
752     }
753   }
754   int def_bits = 0;
755   if (ones > zeroes) {
756     def_bits = mask;
757   }
758   int inst_count = 0;
759   for (int base_shift = 0; base_shift < 64; base_shift += 16) {
760     int part = (c >> base_shift) & mask;
761     if (part != def_bits) {
762       if (masm != NULL) {
763         if (inst_count > 0) {
764           masm->movk(rd, part, base_shift);
765         } else {
766           if (def_bits == 0) {
767             masm->movz(rd, part, base_shift);
768           } else {
769             masm->movn(rd, ~part & mask, base_shift);
770           }
771         }
772       }
773       inst_count++;
774     }
775   }
776   assert((1 <= inst_count) && (inst_count <= 4), "incorrect number of instructions");
777   return inst_count;
778 }
779 
mov_slow(Register rd,intptr_t c)780 void MacroAssembler::mov_slow(Register rd, intptr_t c) {
781 #ifdef ASSERT
782   int off = offset();
783 #endif
784   (void) mov_slow_helper(rd, c, this);
785   assert(offset() - off == instr_count_for_mov_slow(c) * InstructionSize, "size mismatch");
786 }
787 
788 // Counts instructions generated by mov_slow(rd, c).
instr_count_for_mov_slow(intptr_t c)789 int MacroAssembler::instr_count_for_mov_slow(intptr_t c) {
790   return mov_slow_helper(noreg, c, NULL);
791 }
792 
instr_count_for_mov_slow(address c)793 int MacroAssembler::instr_count_for_mov_slow(address c) {
794   return mov_slow_helper(noreg, (intptr_t)c, NULL);
795 }
796 
797 #else
798 
mov_slow(Register rd,intptr_t c,AsmCondition cond)799 void MacroAssembler::mov_slow(Register rd, intptr_t c, AsmCondition cond) {
800   if (AsmOperand::is_rotated_imm(c)) {
801     mov(rd, c, cond);
802   } else if (AsmOperand::is_rotated_imm(~c)) {
803     mvn(rd, ~c, cond);
804   } else if (VM_Version::supports_movw()) {
805     movw(rd, c & 0xffff, cond);
806     if ((unsigned int)c >> 16) {
807       movt(rd, (unsigned int)c >> 16, cond);
808     }
809   } else {
810     // Find first non-zero bit
811     int shift = 0;
812     while ((c & (3 << shift)) == 0) {
813       shift += 2;
814     }
815     // Put the least significant part of the constant
816     int mask = 0xff << shift;
817     mov(rd, c & mask, cond);
818     // Add up to 3 other parts of the constant;
819     // each of them can be represented as rotated_imm
820     if (c & (mask << 8)) {
821       orr(rd, rd, c & (mask << 8), cond);
822     }
823     if (c & (mask << 16)) {
824       orr(rd, rd, c & (mask << 16), cond);
825     }
826     if (c & (mask << 24)) {
827       orr(rd, rd, c & (mask << 24), cond);
828     }
829   }
830 }
831 
832 #endif // AARCH64
833 
mov_oop(Register rd,jobject o,int oop_index,bool patchable)834 void MacroAssembler::mov_oop(Register rd, jobject o, int oop_index,
835 #ifdef AARCH64
836                              bool patchable
837 #else
838                              AsmCondition cond
839 #endif
840                              ) {
841 
842   if (o == NULL) {
843 #ifdef AARCH64
844     if (patchable) {
845       nop();
846     }
847     mov(rd, ZR);
848 #else
849     mov(rd, 0, cond);
850 #endif
851     return;
852   }
853 
854   if (oop_index == 0) {
855     oop_index = oop_recorder()->allocate_oop_index(o);
856   }
857   relocate(oop_Relocation::spec(oop_index));
858 
859 #ifdef AARCH64
860   if (patchable) {
861     nop();
862   }
863   ldr(rd, pc());
864 #else
865   if (VM_Version::supports_movw()) {
866     movw(rd, 0, cond);
867     movt(rd, 0, cond);
868   } else {
869     ldr(rd, Address(PC), cond);
870     // Extra nop to handle case of large offset of oop placeholder (see NativeMovConstReg::set_data).
871     nop();
872   }
873 #endif
874 }
875 
mov_metadata(Register rd,Metadata * o,int metadata_index AARCH64_ONLY_ARG (bool patchable))876 void MacroAssembler::mov_metadata(Register rd, Metadata* o, int metadata_index AARCH64_ONLY_ARG(bool patchable)) {
877   if (o == NULL) {
878 #ifdef AARCH64
879     if (patchable) {
880       nop();
881     }
882 #endif
883     mov(rd, 0);
884     return;
885   }
886 
887   if (metadata_index == 0) {
888     metadata_index = oop_recorder()->allocate_metadata_index(o);
889   }
890   relocate(metadata_Relocation::spec(metadata_index));
891 
892 #ifdef AARCH64
893   if (patchable) {
894     nop();
895   }
896 #ifdef COMPILER2
897   if (!patchable && VM_Version::prefer_moves_over_load_literal()) {
898     mov_slow(rd, (address)o);
899     return;
900   }
901 #endif
902   ldr(rd, pc());
903 #else
904   if (VM_Version::supports_movw()) {
905     movw(rd, ((int)o) & 0xffff);
906     movt(rd, (unsigned int)o >> 16);
907   } else {
908     ldr(rd, Address(PC));
909     // Extra nop to handle case of large offset of metadata placeholder (see NativeMovConstReg::set_data).
910     nop();
911   }
912 #endif // AARCH64
913 }
914 
mov_float(FloatRegister fd,jfloat c NOT_AARCH64_ARG (AsmCondition cond))915 void MacroAssembler::mov_float(FloatRegister fd, jfloat c NOT_AARCH64_ARG(AsmCondition cond)) {
916   Label skip_constant;
917   union {
918     jfloat f;
919     jint i;
920   } accessor;
921   accessor.f = c;
922 
923 #ifdef AARCH64
924   // TODO-AARCH64 - try to optimize loading of float constants with fmov and/or mov_slow
925   Label L;
926   ldr_s(fd, target(L));
927   b(skip_constant);
928   bind(L);
929   emit_int32(accessor.i);
930   bind(skip_constant);
931 #else
932   flds(fd, Address(PC), cond);
933   b(skip_constant);
934   emit_int32(accessor.i);
935   bind(skip_constant);
936 #endif // AARCH64
937 }
938 
mov_double(FloatRegister fd,jdouble c NOT_AARCH64_ARG (AsmCondition cond))939 void MacroAssembler::mov_double(FloatRegister fd, jdouble c NOT_AARCH64_ARG(AsmCondition cond)) {
940   Label skip_constant;
941   union {
942     jdouble d;
943     jint i[2];
944   } accessor;
945   accessor.d = c;
946 
947 #ifdef AARCH64
948   // TODO-AARCH64 - try to optimize loading of double constants with fmov
949   Label L;
950   ldr_d(fd, target(L));
951   b(skip_constant);
952   align(wordSize);
953   bind(L);
954   emit_int32(accessor.i[0]);
955   emit_int32(accessor.i[1]);
956   bind(skip_constant);
957 #else
958   fldd(fd, Address(PC), cond);
959   b(skip_constant);
960   emit_int32(accessor.i[0]);
961   emit_int32(accessor.i[1]);
962   bind(skip_constant);
963 #endif // AARCH64
964 }
965 
ldr_global_s32(Register reg,address address_of_global)966 void MacroAssembler::ldr_global_s32(Register reg, address address_of_global) {
967   intptr_t addr = (intptr_t) address_of_global;
968 #ifdef AARCH64
969   assert((addr & 0x3) == 0, "address should be aligned");
970 
971   // FIXME: TODO
972   if (false && page_reachable_from_cache(address_of_global)) {
973     assert(false,"TODO: relocate");
974     //relocate();
975     adrp(reg, address_of_global);
976     ldrsw(reg, Address(reg, addr & 0xfff));
977   } else {
978     mov_slow(reg, addr & ~0x3fff);
979     ldrsw(reg, Address(reg, addr & 0x3fff));
980   }
981 #else
982   mov_slow(reg, addr & ~0xfff);
983   ldr(reg, Address(reg, addr & 0xfff));
984 #endif
985 }
986 
ldr_global_ptr(Register reg,address address_of_global)987 void MacroAssembler::ldr_global_ptr(Register reg, address address_of_global) {
988 #ifdef AARCH64
989   intptr_t addr = (intptr_t) address_of_global;
990   assert ((addr & 0x7) == 0, "address should be aligned");
991   mov_slow(reg, addr & ~0x7fff);
992   ldr(reg, Address(reg, addr & 0x7fff));
993 #else
994   ldr_global_s32(reg, address_of_global);
995 #endif
996 }
997 
ldrb_global(Register reg,address address_of_global)998 void MacroAssembler::ldrb_global(Register reg, address address_of_global) {
999   intptr_t addr = (intptr_t) address_of_global;
1000   mov_slow(reg, addr & ~0xfff);
1001   ldrb(reg, Address(reg, addr & 0xfff));
1002 }
1003 
zero_extend(Register rd,Register rn,int bits)1004 void MacroAssembler::zero_extend(Register rd, Register rn, int bits) {
1005 #ifdef AARCH64
1006   switch (bits) {
1007     case  8: uxtb(rd, rn); break;
1008     case 16: uxth(rd, rn); break;
1009     case 32: mov_w(rd, rn); break;
1010     default: ShouldNotReachHere();
1011   }
1012 #else
1013   if (bits <= 8) {
1014     andr(rd, rn, (1 << bits) - 1);
1015   } else if (bits >= 24) {
1016     bic(rd, rn, -1 << bits);
1017   } else {
1018     mov(rd, AsmOperand(rn, lsl, 32 - bits));
1019     mov(rd, AsmOperand(rd, lsr, 32 - bits));
1020   }
1021 #endif
1022 }
1023 
sign_extend(Register rd,Register rn,int bits)1024 void MacroAssembler::sign_extend(Register rd, Register rn, int bits) {
1025 #ifdef AARCH64
1026   switch (bits) {
1027     case  8: sxtb(rd, rn); break;
1028     case 16: sxth(rd, rn); break;
1029     case 32: sxtw(rd, rn); break;
1030     default: ShouldNotReachHere();
1031   }
1032 #else
1033   mov(rd, AsmOperand(rn, lsl, 32 - bits));
1034   mov(rd, AsmOperand(rd, asr, 32 - bits));
1035 #endif
1036 }
1037 
1038 #ifndef AARCH64
1039 
long_move(Register rd_lo,Register rd_hi,Register rn_lo,Register rn_hi,AsmCondition cond)1040 void MacroAssembler::long_move(Register rd_lo, Register rd_hi,
1041                                Register rn_lo, Register rn_hi,
1042                                AsmCondition cond) {
1043   if (rd_lo != rn_hi) {
1044     if (rd_lo != rn_lo) { mov(rd_lo, rn_lo, cond); }
1045     if (rd_hi != rn_hi) { mov(rd_hi, rn_hi, cond); }
1046   } else if (rd_hi != rn_lo) {
1047     if (rd_hi != rn_hi) { mov(rd_hi, rn_hi, cond); }
1048     if (rd_lo != rn_lo) { mov(rd_lo, rn_lo, cond); }
1049   } else {
1050     eor(rd_lo, rd_hi, rd_lo, cond);
1051     eor(rd_hi, rd_lo, rd_hi, cond);
1052     eor(rd_lo, rd_hi, rd_lo, cond);
1053   }
1054 }
1055 
long_shift(Register rd_lo,Register rd_hi,Register rn_lo,Register rn_hi,AsmShift shift,Register count)1056 void MacroAssembler::long_shift(Register rd_lo, Register rd_hi,
1057                                 Register rn_lo, Register rn_hi,
1058                                 AsmShift shift, Register count) {
1059   Register tmp;
1060   if (rd_lo != rn_lo && rd_lo != rn_hi && rd_lo != count) {
1061     tmp = rd_lo;
1062   } else {
1063     tmp = rd_hi;
1064   }
1065   assert_different_registers(tmp, count, rn_lo, rn_hi);
1066 
1067   subs(tmp, count, 32);
1068   if (shift == lsl) {
1069     assert_different_registers(rd_hi, rn_lo);
1070     assert_different_registers(count, rd_hi);
1071     mov(rd_hi, AsmOperand(rn_lo, shift, tmp), pl);
1072     rsb(tmp, count, 32, mi);
1073     if (rd_hi == rn_hi) {
1074       mov(rd_hi, AsmOperand(rn_hi, lsl, count), mi);
1075       orr(rd_hi, rd_hi, AsmOperand(rn_lo, lsr, tmp), mi);
1076     } else {
1077       mov(rd_hi, AsmOperand(rn_lo, lsr, tmp), mi);
1078       orr(rd_hi, rd_hi, AsmOperand(rn_hi, lsl, count), mi);
1079     }
1080     mov(rd_lo, AsmOperand(rn_lo, shift, count));
1081   } else {
1082     assert_different_registers(rd_lo, rn_hi);
1083     assert_different_registers(rd_lo, count);
1084     mov(rd_lo, AsmOperand(rn_hi, shift, tmp), pl);
1085     rsb(tmp, count, 32, mi);
1086     if (rd_lo == rn_lo) {
1087       mov(rd_lo, AsmOperand(rn_lo, lsr, count), mi);
1088       orr(rd_lo, rd_lo, AsmOperand(rn_hi, lsl, tmp), mi);
1089     } else {
1090       mov(rd_lo, AsmOperand(rn_hi, lsl, tmp), mi);
1091       orr(rd_lo, rd_lo, AsmOperand(rn_lo, lsr, count), mi);
1092     }
1093     mov(rd_hi, AsmOperand(rn_hi, shift, count));
1094   }
1095 }
1096 
long_shift(Register rd_lo,Register rd_hi,Register rn_lo,Register rn_hi,AsmShift shift,int count)1097 void MacroAssembler::long_shift(Register rd_lo, Register rd_hi,
1098                                 Register rn_lo, Register rn_hi,
1099                                 AsmShift shift, int count) {
1100   assert(count != 0 && (count & ~63) == 0, "must be");
1101 
1102   if (shift == lsl) {
1103     assert_different_registers(rd_hi, rn_lo);
1104     if (count >= 32) {
1105       mov(rd_hi, AsmOperand(rn_lo, lsl, count - 32));
1106       mov(rd_lo, 0);
1107     } else {
1108       mov(rd_hi, AsmOperand(rn_hi, lsl, count));
1109       orr(rd_hi, rd_hi, AsmOperand(rn_lo, lsr, 32 - count));
1110       mov(rd_lo, AsmOperand(rn_lo, lsl, count));
1111     }
1112   } else {
1113     assert_different_registers(rd_lo, rn_hi);
1114     if (count >= 32) {
1115       if (count == 32) {
1116         mov(rd_lo, rn_hi);
1117       } else {
1118         mov(rd_lo, AsmOperand(rn_hi, shift, count - 32));
1119       }
1120       if (shift == asr) {
1121         mov(rd_hi, AsmOperand(rn_hi, asr, 0));
1122       } else {
1123         mov(rd_hi, 0);
1124       }
1125     } else {
1126       mov(rd_lo, AsmOperand(rn_lo, lsr, count));
1127       orr(rd_lo, rd_lo, AsmOperand(rn_hi, lsl, 32 - count));
1128       mov(rd_hi, AsmOperand(rn_hi, shift, count));
1129     }
1130   }
1131 }
1132 #endif // !AARCH64
1133 
_verify_oop(Register reg,const char * s,const char * file,int line)1134 void MacroAssembler::_verify_oop(Register reg, const char* s, const char* file, int line) {
1135   // This code pattern is matched in NativeIntruction::skip_verify_oop.
1136   // Update it at modifications.
1137   if (!VerifyOops) return;
1138 
1139   char buffer[64];
1140 #ifdef COMPILER1
1141   if (CommentedAssembly) {
1142     snprintf(buffer, sizeof(buffer), "verify_oop at %d", offset());
1143     block_comment(buffer);
1144   }
1145 #endif
1146   const char* msg_buffer = NULL;
1147   {
1148     ResourceMark rm;
1149     stringStream ss;
1150     ss.print("%s at offset %d (%s:%d)", s, offset(), file, line);
1151     msg_buffer = code_string(ss.as_string());
1152   }
1153 
1154   save_all_registers();
1155 
1156   if (reg != R2) {
1157       mov(R2, reg);                              // oop to verify
1158   }
1159   mov(R1, SP);                                   // register save area
1160 
1161   Label done;
1162   InlinedString Lmsg(msg_buffer);
1163   ldr_literal(R0, Lmsg);                         // message
1164 
1165   // call indirectly to solve generation ordering problem
1166   ldr_global_ptr(Rtemp, StubRoutines::verify_oop_subroutine_entry_address());
1167   call(Rtemp);
1168 
1169   restore_all_registers();
1170 
1171   b(done);
1172 #ifdef COMPILER2
1173   int off = offset();
1174 #endif
1175   bind_literal(Lmsg);
1176 #ifdef COMPILER2
1177   if (offset() - off == 1 * wordSize) {
1178     // no padding, so insert nop for worst-case sizing
1179     nop();
1180   }
1181 #endif
1182   bind(done);
1183 }
1184 
_verify_oop_addr(Address addr,const char * s,const char * file,int line)1185 void MacroAssembler::_verify_oop_addr(Address addr, const char* s, const char* file, int line) {
1186   if (!VerifyOops) return;
1187 
1188   const char* msg_buffer = NULL;
1189   {
1190     ResourceMark rm;
1191     stringStream ss;
1192     if ((addr.base() == SP) && (addr.index()==noreg)) {
1193       ss.print("verify_oop_addr SP[%d]: %s", (int)addr.disp(), s);
1194     } else {
1195       ss.print("verify_oop_addr: %s", s);
1196     }
1197     ss.print(" (%s:%d)", file, line);
1198     msg_buffer = code_string(ss.as_string());
1199   }
1200 
1201   int push_size = save_all_registers();
1202 
1203   if (addr.base() == SP) {
1204     // computes an addr that takes into account the push
1205     if (addr.index() != noreg) {
1206       Register new_base = addr.index() == R2 ? R1 : R2; // avoid corrupting the index
1207       add(new_base, SP, push_size);
1208       addr = addr.rebase(new_base);
1209     } else {
1210       addr = addr.plus_disp(push_size);
1211     }
1212   }
1213 
1214   ldr(R2, addr);                                 // oop to verify
1215   mov(R1, SP);                                   // register save area
1216 
1217   Label done;
1218   InlinedString Lmsg(msg_buffer);
1219   ldr_literal(R0, Lmsg);                         // message
1220 
1221   // call indirectly to solve generation ordering problem
1222   ldr_global_ptr(Rtemp, StubRoutines::verify_oop_subroutine_entry_address());
1223   call(Rtemp);
1224 
1225   restore_all_registers();
1226 
1227   b(done);
1228   bind_literal(Lmsg);
1229   bind(done);
1230 }
1231 
c2bool(Register x)1232 void MacroAssembler::c2bool(Register x) {
1233   tst(x, 0xff);   // Only look at the lowest byte
1234 #ifdef AARCH64
1235   cset(x, ne);
1236 #else
1237   mov(x, 1, ne);
1238 #endif
1239 }
1240 
null_check(Register reg,Register tmp,int offset)1241 void MacroAssembler::null_check(Register reg, Register tmp, int offset) {
1242   if (needs_explicit_null_check(offset)) {
1243 #ifdef AARCH64
1244     ldr(ZR, Address(reg));
1245 #else
1246     assert_different_registers(reg, tmp);
1247     if (tmp == noreg) {
1248       tmp = Rtemp;
1249       assert((! Thread::current()->is_Compiler_thread()) ||
1250              (! (ciEnv::current()->task() == NULL)) ||
1251              (! (ciEnv::current()->comp_level() == CompLevel_full_optimization)),
1252              "Rtemp not available in C2"); // explicit tmp register required
1253       // XXX: could we mark the code buffer as not compatible with C2 ?
1254     }
1255     ldr(tmp, Address(reg));
1256 #endif
1257   }
1258 }
1259 
1260 // Puts address of allocated object into register `obj` and end of allocated object into register `obj_end`.
eden_allocate(Register obj,Register obj_end,Register tmp1,Register tmp2,RegisterOrConstant size_expression,Label & slow_case)1261 void MacroAssembler::eden_allocate(Register obj, Register obj_end, Register tmp1, Register tmp2,
1262                                  RegisterOrConstant size_expression, Label& slow_case) {
1263   if (!Universe::heap()->supports_inline_contig_alloc()) {
1264     b(slow_case);
1265     return;
1266   }
1267 
1268   CollectedHeap* ch = Universe::heap();
1269 
1270   const Register top_addr = tmp1;
1271   const Register heap_end = tmp2;
1272 
1273   if (size_expression.is_register()) {
1274     assert_different_registers(obj, obj_end, top_addr, heap_end, size_expression.as_register());
1275   } else {
1276     assert_different_registers(obj, obj_end, top_addr, heap_end);
1277   }
1278 
1279   bool load_const = AARCH64_ONLY(false) NOT_AARCH64(VM_Version::supports_movw() ); // TODO-AARCH64 check performance
1280   if (load_const) {
1281     mov_address(top_addr, (address)Universe::heap()->top_addr(), symbolic_Relocation::eden_top_reference);
1282   } else {
1283     ldr(top_addr, Address(Rthread, JavaThread::heap_top_addr_offset()));
1284   }
1285   // Calculate new heap_top by adding the size of the object
1286   Label retry;
1287   bind(retry);
1288 
1289 #ifdef AARCH64
1290   ldxr(obj, top_addr);
1291 #else
1292   ldr(obj, Address(top_addr));
1293 #endif // AARCH64
1294 
1295   ldr(heap_end, Address(top_addr, (intptr_t)ch->end_addr() - (intptr_t)ch->top_addr()));
1296   add_rc(obj_end, obj, size_expression);
1297   // Check if obj_end wrapped around, i.e., obj_end < obj. If yes, jump to the slow case.
1298   cmp(obj_end, obj);
1299   b(slow_case, lo);
1300   // Update heap_top if allocation succeeded
1301   cmp(obj_end, heap_end);
1302   b(slow_case, hi);
1303 
1304 #ifdef AARCH64
1305   stxr(heap_end/*scratched*/, obj_end, top_addr);
1306   cbnz_w(heap_end, retry);
1307 #else
1308   atomic_cas_bool(obj, obj_end, top_addr, 0, heap_end/*scratched*/);
1309   b(retry, ne);
1310 #endif // AARCH64
1311 }
1312 
1313 // Puts address of allocated object into register `obj` and end of allocated object into register `obj_end`.
tlab_allocate(Register obj,Register obj_end,Register tmp1,RegisterOrConstant size_expression,Label & slow_case)1314 void MacroAssembler::tlab_allocate(Register obj, Register obj_end, Register tmp1,
1315                                  RegisterOrConstant size_expression, Label& slow_case) {
1316   const Register tlab_end = tmp1;
1317   assert_different_registers(obj, obj_end, tlab_end);
1318 
1319   ldr(obj, Address(Rthread, JavaThread::tlab_top_offset()));
1320   ldr(tlab_end, Address(Rthread, JavaThread::tlab_end_offset()));
1321   add_rc(obj_end, obj, size_expression);
1322   cmp(obj_end, tlab_end);
1323   b(slow_case, hi);
1324   str(obj_end, Address(Rthread, JavaThread::tlab_top_offset()));
1325 }
1326 
1327 // Fills memory regions [start..end] with zeroes. Clobbers `start` and `tmp` registers.
zero_memory(Register start,Register end,Register tmp)1328 void MacroAssembler::zero_memory(Register start, Register end, Register tmp) {
1329   Label loop;
1330   const Register ptr = start;
1331 
1332 #ifdef AARCH64
1333   // TODO-AARCH64 - compare performance of 2x word zeroing with simple 1x
1334   const Register size = tmp;
1335   Label remaining, done;
1336 
1337   sub(size, end, start);
1338 
1339 #ifdef ASSERT
1340   { Label L;
1341     tst(size, wordSize - 1);
1342     b(L, eq);
1343     stop("size is not a multiple of wordSize");
1344     bind(L);
1345   }
1346 #endif // ASSERT
1347 
1348   subs(size, size, wordSize);
1349   b(remaining, le);
1350 
1351   // Zero by 2 words per iteration.
1352   bind(loop);
1353   subs(size, size, 2*wordSize);
1354   stp(ZR, ZR, Address(ptr, 2*wordSize, post_indexed));
1355   b(loop, gt);
1356 
1357   bind(remaining);
1358   b(done, ne);
1359   str(ZR, Address(ptr));
1360   bind(done);
1361 #else
1362   mov(tmp, 0);
1363   bind(loop);
1364   cmp(ptr, end);
1365   str(tmp, Address(ptr, wordSize, post_indexed), lo);
1366   b(loop, lo);
1367 #endif // AARCH64
1368 }
1369 
incr_allocated_bytes(RegisterOrConstant size_in_bytes,Register tmp)1370 void MacroAssembler::incr_allocated_bytes(RegisterOrConstant size_in_bytes, Register tmp) {
1371 #ifdef AARCH64
1372   ldr(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset())));
1373   add_rc(tmp, tmp, size_in_bytes);
1374   str(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset())));
1375 #else
1376   // Bump total bytes allocated by this thread
1377   Label done;
1378 
1379   // Borrow the Rthread for alloc counter
1380   Register Ralloc = Rthread;
1381   add(Ralloc, Ralloc, in_bytes(JavaThread::allocated_bytes_offset()));
1382   ldr(tmp, Address(Ralloc));
1383   adds(tmp, tmp, size_in_bytes);
1384   str(tmp, Address(Ralloc), cc);
1385   b(done, cc);
1386 
1387   // Increment the high word and store single-copy atomically (that is an unlikely scenario on typical embedded systems as it means >4GB has been allocated)
1388   // To do so ldrd/strd instructions used which require an even-odd pair of registers. Such a request could be difficult to satisfy by
1389   // allocating those registers on a higher level, therefore the routine is ready to allocate a pair itself.
1390   Register low, high;
1391   // Select ether R0/R1 or R2/R3
1392 
1393   if (size_in_bytes.is_register() && (size_in_bytes.as_register() == R0 || size_in_bytes.as_register() == R1)) {
1394     low = R2;
1395     high  = R3;
1396   } else {
1397     low = R0;
1398     high  = R1;
1399   }
1400   push(RegisterSet(low, high));
1401 
1402   ldrd(low, Address(Ralloc));
1403   adds(low, low, size_in_bytes);
1404   adc(high, high, 0);
1405   strd(low, Address(Ralloc));
1406 
1407   pop(RegisterSet(low, high));
1408 
1409   bind(done);
1410 
1411   // Unborrow the Rthread
1412   sub(Rthread, Ralloc, in_bytes(JavaThread::allocated_bytes_offset()));
1413 #endif // AARCH64
1414 }
1415 
arm_stack_overflow_check(int frame_size_in_bytes,Register tmp)1416 void MacroAssembler::arm_stack_overflow_check(int frame_size_in_bytes, Register tmp) {
1417   // Version of AbstractAssembler::generate_stack_overflow_check optimized for ARM
1418   if (UseStackBanging) {
1419     const int page_size = os::vm_page_size();
1420 
1421     sub_slow(tmp, SP, JavaThread::stack_shadow_zone_size());
1422     strb(R0, Address(tmp));
1423 #ifdef AARCH64
1424     for (; frame_size_in_bytes >= page_size; frame_size_in_bytes -= page_size) {
1425       sub(tmp, tmp, page_size);
1426       strb(R0, Address(tmp));
1427     }
1428 #else
1429     for (; frame_size_in_bytes >= page_size; frame_size_in_bytes -= 0xff0) {
1430       strb(R0, Address(tmp, -0xff0, pre_indexed));
1431     }
1432 #endif // AARCH64
1433   }
1434 }
1435 
arm_stack_overflow_check(Register Rsize,Register tmp)1436 void MacroAssembler::arm_stack_overflow_check(Register Rsize, Register tmp) {
1437   if (UseStackBanging) {
1438     Label loop;
1439 
1440     mov(tmp, SP);
1441     add_slow(Rsize, Rsize, JavaThread::stack_shadow_zone_size() - os::vm_page_size());
1442 #ifdef AARCH64
1443     sub(tmp, tmp, Rsize);
1444     bind(loop);
1445     subs(Rsize, Rsize, os::vm_page_size());
1446     strb(ZR, Address(tmp, Rsize));
1447 #else
1448     bind(loop);
1449     subs(Rsize, Rsize, 0xff0);
1450     strb(R0, Address(tmp, -0xff0, pre_indexed));
1451 #endif // AARCH64
1452     b(loop, hi);
1453   }
1454 }
1455 
stop(const char * msg)1456 void MacroAssembler::stop(const char* msg) {
1457   // This code pattern is matched in NativeIntruction::is_stop.
1458   // Update it at modifications.
1459 #ifdef COMPILER1
1460   if (CommentedAssembly) {
1461     block_comment("stop");
1462   }
1463 #endif
1464 
1465   InlinedAddress Ldebug(CAST_FROM_FN_PTR(address, MacroAssembler::debug));
1466   InlinedString Lmsg(msg);
1467 
1468   // save all registers for further inspection
1469   save_all_registers();
1470 
1471   ldr_literal(R0, Lmsg);                     // message
1472   mov(R1, SP);                               // register save area
1473 
1474 #ifdef AARCH64
1475   ldr_literal(Rtemp, Ldebug);
1476   br(Rtemp);
1477 #else
1478   ldr_literal(PC, Ldebug);                   // call MacroAssembler::debug
1479 #endif // AARCH64
1480 
1481 #if defined(COMPILER2) && defined(AARCH64)
1482   int off = offset();
1483 #endif
1484   bind_literal(Lmsg);
1485   bind_literal(Ldebug);
1486 #if defined(COMPILER2) && defined(AARCH64)
1487   if (offset() - off == 2 * wordSize) {
1488     // no padding, so insert nop for worst-case sizing
1489     nop();
1490   }
1491 #endif
1492 }
1493 
warn(const char * msg)1494 void MacroAssembler::warn(const char* msg) {
1495 #ifdef COMPILER1
1496   if (CommentedAssembly) {
1497     block_comment("warn");
1498   }
1499 #endif
1500 
1501   InlinedAddress Lwarn(CAST_FROM_FN_PTR(address, warning));
1502   InlinedString Lmsg(msg);
1503   Label done;
1504 
1505   int push_size = save_caller_save_registers();
1506 
1507 #ifdef AARCH64
1508   // TODO-AARCH64 - get rid of extra debug parameters
1509   mov(R1, LR);
1510   mov(R2, FP);
1511   add(R3, SP, push_size);
1512 #endif
1513 
1514   ldr_literal(R0, Lmsg);                    // message
1515   ldr_literal(LR, Lwarn);                   // call warning
1516 
1517   call(LR);
1518 
1519   restore_caller_save_registers();
1520 
1521   b(done);
1522   bind_literal(Lmsg);
1523   bind_literal(Lwarn);
1524   bind(done);
1525 }
1526 
1527 
save_all_registers()1528 int MacroAssembler::save_all_registers() {
1529   // This code pattern is matched in NativeIntruction::is_save_all_registers.
1530   // Update it at modifications.
1531 #ifdef AARCH64
1532   const Register tmp = Rtemp;
1533   raw_push(R30, ZR);
1534   for (int i = 28; i >= 0; i -= 2) {
1535       raw_push(as_Register(i), as_Register(i+1));
1536   }
1537   mov_pc_to(tmp);
1538   str(tmp, Address(SP, 31*wordSize));
1539   ldr(tmp, Address(SP, tmp->encoding()*wordSize));
1540   return 32*wordSize;
1541 #else
1542   push(RegisterSet(R0, R12) | RegisterSet(LR) | RegisterSet(PC));
1543   return 15*wordSize;
1544 #endif // AARCH64
1545 }
1546 
restore_all_registers()1547 void MacroAssembler::restore_all_registers() {
1548 #ifdef AARCH64
1549   for (int i = 0; i <= 28; i += 2) {
1550     raw_pop(as_Register(i), as_Register(i+1));
1551   }
1552   raw_pop(R30, ZR);
1553 #else
1554   pop(RegisterSet(R0, R12) | RegisterSet(LR));   // restore registers
1555   add(SP, SP, wordSize);                         // discard saved PC
1556 #endif // AARCH64
1557 }
1558 
save_caller_save_registers()1559 int MacroAssembler::save_caller_save_registers() {
1560 #ifdef AARCH64
1561   for (int i = 0; i <= 16; i += 2) {
1562     raw_push(as_Register(i), as_Register(i+1));
1563   }
1564   raw_push(R18, LR);
1565   return 20*wordSize;
1566 #else
1567 #if R9_IS_SCRATCHED
1568   // Save also R10 to preserve alignment
1569   push(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR) | RegisterSet(R9,R10));
1570   return 8*wordSize;
1571 #else
1572   push(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR));
1573   return 6*wordSize;
1574 #endif
1575 #endif // AARCH64
1576 }
1577 
restore_caller_save_registers()1578 void MacroAssembler::restore_caller_save_registers() {
1579 #ifdef AARCH64
1580   raw_pop(R18, LR);
1581   for (int i = 16; i >= 0; i -= 2) {
1582     raw_pop(as_Register(i), as_Register(i+1));
1583   }
1584 #else
1585 #if R9_IS_SCRATCHED
1586   pop(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR) | RegisterSet(R9,R10));
1587 #else
1588   pop(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR));
1589 #endif
1590 #endif // AARCH64
1591 }
1592 
debug(const char * msg,const intx * registers)1593 void MacroAssembler::debug(const char* msg, const intx* registers) {
1594   // In order to get locks to work, we need to fake a in_VM state
1595   JavaThread* thread = JavaThread::current();
1596   thread->set_thread_state(_thread_in_vm);
1597 
1598   if (ShowMessageBoxOnError) {
1599     ttyLocker ttyl;
1600     if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
1601       BytecodeCounter::print();
1602     }
1603     if (os::message_box(msg, "Execution stopped, print registers?")) {
1604 #ifdef AARCH64
1605       // saved registers: R0-R30, PC
1606       const int nregs = 32;
1607 #else
1608       // saved registers: R0-R12, LR, PC
1609       const int nregs = 15;
1610       const Register regs[nregs] = {R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, PC};
1611 #endif // AARCH64
1612 
1613       for (int i = 0; i < nregs AARCH64_ONLY(-1); i++) {
1614         tty->print_cr("%s = " INTPTR_FORMAT, AARCH64_ONLY(as_Register(i)) NOT_AARCH64(regs[i])->name(), registers[i]);
1615       }
1616 
1617 #ifdef AARCH64
1618       tty->print_cr("pc = " INTPTR_FORMAT, registers[nregs-1]);
1619 #endif // AARCH64
1620 
1621       // derive original SP value from the address of register save area
1622       tty->print_cr("%s = " INTPTR_FORMAT, SP->name(), p2i(&registers[nregs]));
1623     }
1624     BREAKPOINT;
1625   } else {
1626     ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg);
1627   }
1628   assert(false, "DEBUG MESSAGE: %s", msg);
1629   fatal("%s", msg); // returning from MacroAssembler::debug is not supported
1630 }
1631 
unimplemented(const char * what)1632 void MacroAssembler::unimplemented(const char* what) {
1633   const char* buf = NULL;
1634   {
1635     ResourceMark rm;
1636     stringStream ss;
1637     ss.print("unimplemented: %s", what);
1638     buf = code_string(ss.as_string());
1639   }
1640   stop(buf);
1641 }
1642 
1643 
1644 // Implementation of FixedSizeCodeBlock
1645 
FixedSizeCodeBlock(MacroAssembler * masm,int size_in_instrs,bool enabled)1646 FixedSizeCodeBlock::FixedSizeCodeBlock(MacroAssembler* masm, int size_in_instrs, bool enabled) :
1647 _masm(masm), _start(masm->pc()), _size_in_instrs(size_in_instrs), _enabled(enabled) {
1648 }
1649 
~FixedSizeCodeBlock()1650 FixedSizeCodeBlock::~FixedSizeCodeBlock() {
1651   if (_enabled) {
1652     address curr_pc = _masm->pc();
1653 
1654     assert(_start < curr_pc, "invalid current pc");
1655     guarantee(curr_pc <= _start + _size_in_instrs * Assembler::InstructionSize, "code block is too long");
1656 
1657     int nops_count = (_start - curr_pc) / Assembler::InstructionSize + _size_in_instrs;
1658     for (int i = 0; i < nops_count; i++) {
1659       _masm->nop();
1660     }
1661   }
1662 }
1663 
1664 #ifdef AARCH64
1665 
1666 // Serializes memory.
1667 // tmp register is not used on AArch64, this parameter is provided solely for better compatibility with 32-bit ARM
membar(Membar_mask_bits order_constraint,Register tmp)1668 void MacroAssembler::membar(Membar_mask_bits order_constraint, Register tmp) {
1669   if (!os::is_MP()) return;
1670 
1671   // TODO-AARCH64 investigate dsb vs dmb effects
1672   if (order_constraint == StoreStore) {
1673     dmb(DMB_st);
1674   } else if ((order_constraint & ~(LoadLoad | LoadStore)) == 0) {
1675     dmb(DMB_ld);
1676   } else {
1677     dmb(DMB_all);
1678   }
1679 }
1680 
1681 #else
1682 
1683 // Serializes memory. Potentially blows flags and reg.
1684 // tmp is a scratch for v6 co-processor write op (could be noreg for other architecure versions)
1685 // preserve_flags takes a longer path in LoadStore case (dmb rather then control dependency) to preserve status flags. Optional.
1686 // load_tgt is an ordered load target in a LoadStore case only, to create dependency between the load operation and conditional branch. Optional.
membar(Membar_mask_bits order_constraint,Register tmp,bool preserve_flags,Register load_tgt)1687 void MacroAssembler::membar(Membar_mask_bits order_constraint,
1688                             Register tmp,
1689                             bool preserve_flags,
1690                             Register load_tgt) {
1691   if (!os::is_MP()) return;
1692 
1693   if (order_constraint == StoreStore) {
1694     dmb(DMB_st, tmp);
1695   } else if ((order_constraint & StoreLoad)  ||
1696              (order_constraint & LoadLoad)   ||
1697              (order_constraint & StoreStore) ||
1698              (load_tgt == noreg)             ||
1699              preserve_flags) {
1700     dmb(DMB_all, tmp);
1701   } else {
1702     // LoadStore: speculative stores reordeing is prohibited
1703 
1704     // By providing an ordered load target register, we avoid an extra memory load reference
1705     Label not_taken;
1706     bind(not_taken);
1707     cmp(load_tgt, load_tgt);
1708     b(not_taken, ne);
1709   }
1710 }
1711 
1712 #endif // AARCH64
1713 
1714 // If "allow_fallthrough_on_failure" is false, we always branch to "slow_case"
1715 // on failure, so fall-through can only mean success.
1716 // "one_shot" controls whether we loop and retry to mitigate spurious failures.
1717 // This is only needed for C2, which for some reason does not rety,
1718 // while C1/interpreter does.
1719 // TODO: measure if it makes a difference
1720 
cas_for_lock_acquire(Register oldval,Register newval,Register base,Register tmp,Label & slow_case,bool allow_fallthrough_on_failure,bool one_shot)1721 void MacroAssembler::cas_for_lock_acquire(Register oldval, Register newval,
1722   Register base, Register tmp, Label &slow_case,
1723   bool allow_fallthrough_on_failure, bool one_shot)
1724 {
1725 
1726   bool fallthrough_is_success = false;
1727 
1728   // ARM Litmus Test example does prefetching here.
1729   // TODO: investigate if it helps performance
1730 
1731   // The last store was to the displaced header, so to prevent
1732   // reordering we must issue a StoreStore or Release barrier before
1733   // the CAS store.
1734 
1735 #ifdef AARCH64
1736 
1737   Register Rscratch = tmp;
1738   Register Roop = base;
1739   Register mark = oldval;
1740   Register Rbox = newval;
1741   Label loop;
1742 
1743   assert(oopDesc::mark_offset_in_bytes() == 0, "must be");
1744 
1745   // Instead of StoreStore here, we use store-release-exclusive below
1746 
1747   bind(loop);
1748 
1749   ldaxr(tmp, base);  // acquire
1750   cmp(tmp, oldval);
1751   b(slow_case, ne);
1752   stlxr(tmp, newval, base); // release
1753   if (one_shot) {
1754     cmp_w(tmp, 0);
1755   } else {
1756     cbnz_w(tmp, loop);
1757     fallthrough_is_success = true;
1758   }
1759 
1760   // MemBarAcquireLock would normally go here, but
1761   // we already do ldaxr+stlxr above, which has
1762   // Sequential Consistency
1763 
1764 #else
1765   membar(MacroAssembler::StoreStore, noreg);
1766 
1767   if (one_shot) {
1768     ldrex(tmp, Address(base, oopDesc::mark_offset_in_bytes()));
1769     cmp(tmp, oldval);
1770     strex(tmp, newval, Address(base, oopDesc::mark_offset_in_bytes()), eq);
1771     cmp(tmp, 0, eq);
1772   } else {
1773     atomic_cas_bool(oldval, newval, base, oopDesc::mark_offset_in_bytes(), tmp);
1774   }
1775 
1776   // MemBarAcquireLock barrier
1777   // According to JSR-133 Cookbook, this should be LoadLoad | LoadStore,
1778   // but that doesn't prevent a load or store from floating up between
1779   // the load and store in the CAS sequence, so play it safe and
1780   // do a full fence.
1781   membar(Membar_mask_bits(LoadLoad | LoadStore | StoreStore | StoreLoad), noreg);
1782 #endif
1783   if (!fallthrough_is_success && !allow_fallthrough_on_failure) {
1784     b(slow_case, ne);
1785   }
1786 }
1787 
cas_for_lock_release(Register oldval,Register newval,Register base,Register tmp,Label & slow_case,bool allow_fallthrough_on_failure,bool one_shot)1788 void MacroAssembler::cas_for_lock_release(Register oldval, Register newval,
1789   Register base, Register tmp, Label &slow_case,
1790   bool allow_fallthrough_on_failure, bool one_shot)
1791 {
1792 
1793   bool fallthrough_is_success = false;
1794 
1795   assert_different_registers(oldval,newval,base,tmp);
1796 
1797 #ifdef AARCH64
1798   Label loop;
1799 
1800   assert(oopDesc::mark_offset_in_bytes() == 0, "must be");
1801 
1802   bind(loop);
1803   ldxr(tmp, base);
1804   cmp(tmp, oldval);
1805   b(slow_case, ne);
1806   // MemBarReleaseLock barrier
1807   stlxr(tmp, newval, base);
1808   if (one_shot) {
1809     cmp_w(tmp, 0);
1810   } else {
1811     cbnz_w(tmp, loop);
1812     fallthrough_is_success = true;
1813   }
1814 #else
1815   // MemBarReleaseLock barrier
1816   // According to JSR-133 Cookbook, this should be StoreStore | LoadStore,
1817   // but that doesn't prevent a load or store from floating down between
1818   // the load and store in the CAS sequence, so play it safe and
1819   // do a full fence.
1820   membar(Membar_mask_bits(LoadLoad | LoadStore | StoreStore | StoreLoad), tmp);
1821 
1822   if (one_shot) {
1823     ldrex(tmp, Address(base, oopDesc::mark_offset_in_bytes()));
1824     cmp(tmp, oldval);
1825     strex(tmp, newval, Address(base, oopDesc::mark_offset_in_bytes()), eq);
1826     cmp(tmp, 0, eq);
1827   } else {
1828     atomic_cas_bool(oldval, newval, base, oopDesc::mark_offset_in_bytes(), tmp);
1829   }
1830 #endif
1831   if (!fallthrough_is_success && !allow_fallthrough_on_failure) {
1832     b(slow_case, ne);
1833   }
1834 
1835   // ExitEnter
1836   // According to JSR-133 Cookbook, this should be StoreLoad, the same
1837   // barrier that follows volatile store.
1838   // TODO: Should be able to remove on armv8 if volatile loads
1839   // use the load-acquire instruction.
1840   membar(StoreLoad, noreg);
1841 }
1842 
1843 #ifndef PRODUCT
1844 
1845 // Preserves flags and all registers.
1846 // On SMP the updated value might not be visible to external observers without a sychronization barrier
cond_atomic_inc32(AsmCondition cond,int * counter_addr)1847 void MacroAssembler::cond_atomic_inc32(AsmCondition cond, int* counter_addr) {
1848   if (counter_addr != NULL) {
1849     InlinedAddress counter_addr_literal((address)counter_addr);
1850     Label done, retry;
1851     if (cond != al) {
1852       b(done, inverse(cond));
1853     }
1854 
1855 #ifdef AARCH64
1856     raw_push(R0, R1);
1857     raw_push(R2, ZR);
1858 
1859     ldr_literal(R0, counter_addr_literal);
1860 
1861     bind(retry);
1862     ldxr_w(R1, R0);
1863     add_w(R1, R1, 1);
1864     stxr_w(R2, R1, R0);
1865     cbnz_w(R2, retry);
1866 
1867     raw_pop(R2, ZR);
1868     raw_pop(R0, R1);
1869 #else
1870     push(RegisterSet(R0, R3) | RegisterSet(Rtemp));
1871     ldr_literal(R0, counter_addr_literal);
1872 
1873     mrs(CPSR, Rtemp);
1874 
1875     bind(retry);
1876     ldr_s32(R1, Address(R0));
1877     add(R2, R1, 1);
1878     atomic_cas_bool(R1, R2, R0, 0, R3);
1879     b(retry, ne);
1880 
1881     msr(CPSR_fsxc, Rtemp);
1882 
1883     pop(RegisterSet(R0, R3) | RegisterSet(Rtemp));
1884 #endif // AARCH64
1885 
1886     b(done);
1887     bind_literal(counter_addr_literal);
1888 
1889     bind(done);
1890   }
1891 }
1892 
1893 #endif // !PRODUCT
1894 
1895 
1896 // Building block for CAS cases of biased locking: makes CAS and records statistics.
1897 // The slow_case label is used to transfer control if CAS fails. Otherwise leaves condition codes set.
biased_locking_enter_with_cas(Register obj_reg,Register old_mark_reg,Register new_mark_reg,Register tmp,Label & slow_case,int * counter_addr)1898 void MacroAssembler::biased_locking_enter_with_cas(Register obj_reg, Register old_mark_reg, Register new_mark_reg,
1899                                                  Register tmp, Label& slow_case, int* counter_addr) {
1900 
1901   cas_for_lock_acquire(old_mark_reg, new_mark_reg, obj_reg, tmp, slow_case);
1902 #ifdef ASSERT
1903   breakpoint(ne); // Fallthrough only on success
1904 #endif
1905 #ifndef PRODUCT
1906   if (counter_addr != NULL) {
1907     cond_atomic_inc32(al, counter_addr);
1908   }
1909 #endif // !PRODUCT
1910 }
1911 
biased_locking_enter(Register obj_reg,Register swap_reg,Register tmp_reg,bool swap_reg_contains_mark,Register tmp2,Label & done,Label & slow_case,BiasedLockingCounters * counters)1912 int MacroAssembler::biased_locking_enter(Register obj_reg, Register swap_reg, Register tmp_reg,
1913                                          bool swap_reg_contains_mark,
1914                                          Register tmp2,
1915                                          Label& done, Label& slow_case,
1916                                          BiasedLockingCounters* counters) {
1917   // obj_reg must be preserved (at least) if the bias locking fails
1918   // tmp_reg is a temporary register
1919   // swap_reg was used as a temporary but contained a value
1920   //   that was used afterwards in some call pathes. Callers
1921   //   have been fixed so that swap_reg no longer needs to be
1922   //   saved.
1923   // Rtemp in no longer scratched
1924 
1925   assert(UseBiasedLocking, "why call this otherwise?");
1926   assert_different_registers(obj_reg, swap_reg, tmp_reg, tmp2);
1927   guarantee(swap_reg!=tmp_reg, "invariant");
1928   assert(tmp_reg != noreg, "must supply tmp_reg");
1929 
1930 #ifndef PRODUCT
1931   if (PrintBiasedLockingStatistics && (counters == NULL)) {
1932     counters = BiasedLocking::counters();
1933   }
1934 #endif
1935 
1936   assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
1937   Address mark_addr(obj_reg, oopDesc::mark_offset_in_bytes());
1938 
1939   // Biased locking
1940   // See whether the lock is currently biased toward our thread and
1941   // whether the epoch is still valid
1942   // Note that the runtime guarantees sufficient alignment of JavaThread
1943   // pointers to allow age to be placed into low bits
1944   // First check to see whether biasing is even enabled for this object
1945   Label cas_label;
1946 
1947   // The null check applies to the mark loading, if we need to load it.
1948   // If the mark has already been loaded in swap_reg then it has already
1949   // been performed and the offset is irrelevant.
1950   int null_check_offset = offset();
1951   if (!swap_reg_contains_mark) {
1952     ldr(swap_reg, mark_addr);
1953   }
1954 
1955   // On MP platform loads could return 'stale' values in some cases.
1956   // That is acceptable since either CAS or slow case path is taken in the worst case.
1957 
1958   andr(tmp_reg, swap_reg, (uintx)markOopDesc::biased_lock_mask_in_place);
1959   cmp(tmp_reg, markOopDesc::biased_lock_pattern);
1960 
1961   b(cas_label, ne);
1962 
1963   // The bias pattern is present in the object's header. Need to check
1964   // whether the bias owner and the epoch are both still current.
1965   load_klass(tmp_reg, obj_reg);
1966   ldr(tmp_reg, Address(tmp_reg, Klass::prototype_header_offset()));
1967   orr(tmp_reg, tmp_reg, Rthread);
1968   eor(tmp_reg, tmp_reg, swap_reg);
1969 
1970 #ifdef AARCH64
1971   ands(tmp_reg, tmp_reg, ~((uintx) markOopDesc::age_mask_in_place));
1972 #else
1973   bics(tmp_reg, tmp_reg, ((int) markOopDesc::age_mask_in_place));
1974 #endif // AARCH64
1975 
1976 #ifndef PRODUCT
1977   if (counters != NULL) {
1978     cond_atomic_inc32(eq, counters->biased_lock_entry_count_addr());
1979   }
1980 #endif // !PRODUCT
1981 
1982   b(done, eq);
1983 
1984   Label try_revoke_bias;
1985   Label try_rebias;
1986 
1987   // At this point we know that the header has the bias pattern and
1988   // that we are not the bias owner in the current epoch. We need to
1989   // figure out more details about the state of the header in order to
1990   // know what operations can be legally performed on the object's
1991   // header.
1992 
1993   // If the low three bits in the xor result aren't clear, that means
1994   // the prototype header is no longer biased and we have to revoke
1995   // the bias on this object.
1996   tst(tmp_reg, (uintx)markOopDesc::biased_lock_mask_in_place);
1997   b(try_revoke_bias, ne);
1998 
1999   // Biasing is still enabled for this data type. See whether the
2000   // epoch of the current bias is still valid, meaning that the epoch
2001   // bits of the mark word are equal to the epoch bits of the
2002   // prototype header. (Note that the prototype header's epoch bits
2003   // only change at a safepoint.) If not, attempt to rebias the object
2004   // toward the current thread. Note that we must be absolutely sure
2005   // that the current epoch is invalid in order to do this because
2006   // otherwise the manipulations it performs on the mark word are
2007   // illegal.
2008   tst(tmp_reg, (uintx)markOopDesc::epoch_mask_in_place);
2009   b(try_rebias, ne);
2010 
2011   // tmp_reg has the age, epoch and pattern bits cleared
2012   // The remaining (owner) bits are (Thread ^ current_owner)
2013 
2014   // The epoch of the current bias is still valid but we know nothing
2015   // about the owner; it might be set or it might be clear. Try to
2016   // acquire the bias of the object using an atomic operation. If this
2017   // fails we will go in to the runtime to revoke the object's bias.
2018   // Note that we first construct the presumed unbiased header so we
2019   // don't accidentally blow away another thread's valid bias.
2020 
2021   // Note that we know the owner is not ourself. Hence, success can
2022   // only happen when the owner bits is 0
2023 
2024 #ifdef AARCH64
2025   // Bit mask biased_lock + age + epoch is not a valid AArch64 logical immediate, as it has
2026   // cleared bit in the middle (cms bit). So it is loaded with separate instruction.
2027   mov(tmp2, (markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place));
2028   andr(swap_reg, swap_reg, tmp2);
2029 #else
2030   // until the assembler can be made smarter, we need to make some assumptions about the values
2031   // so we can optimize this:
2032   assert((markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place) == 0x1ff, "biased bitmasks changed");
2033 
2034   mov(swap_reg, AsmOperand(swap_reg, lsl, 23));
2035   mov(swap_reg, AsmOperand(swap_reg, lsr, 23)); // markOop with thread bits cleared (for CAS)
2036 #endif // AARCH64
2037 
2038   orr(tmp_reg, swap_reg, Rthread); // new mark
2039 
2040   biased_locking_enter_with_cas(obj_reg, swap_reg, tmp_reg, tmp2, slow_case,
2041         (counters != NULL) ? counters->anonymously_biased_lock_entry_count_addr() : NULL);
2042 
2043   // If the biasing toward our thread failed, this means that
2044   // another thread succeeded in biasing it toward itself and we
2045   // need to revoke that bias. The revocation will occur in the
2046   // interpreter runtime in the slow case.
2047 
2048   b(done);
2049 
2050   bind(try_rebias);
2051 
2052   // At this point we know the epoch has expired, meaning that the
2053   // current "bias owner", if any, is actually invalid. Under these
2054   // circumstances _only_, we are allowed to use the current header's
2055   // value as the comparison value when doing the cas to acquire the
2056   // bias in the current epoch. In other words, we allow transfer of
2057   // the bias from one thread to another directly in this situation.
2058 
2059   // tmp_reg low (not owner) bits are (age: 0 | pattern&epoch: prototype^swap_reg)
2060 
2061   eor(tmp_reg, tmp_reg, swap_reg); // OK except for owner bits (age preserved !)
2062 
2063   // owner bits 'random'. Set them to Rthread.
2064 #ifdef AARCH64
2065   mov(tmp2, (markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place));
2066   andr(tmp_reg, tmp_reg, tmp2);
2067 #else
2068   mov(tmp_reg, AsmOperand(tmp_reg, lsl, 23));
2069   mov(tmp_reg, AsmOperand(tmp_reg, lsr, 23));
2070 #endif // AARCH64
2071 
2072   orr(tmp_reg, tmp_reg, Rthread); // new mark
2073 
2074   biased_locking_enter_with_cas(obj_reg, swap_reg, tmp_reg, tmp2, slow_case,
2075         (counters != NULL) ? counters->rebiased_lock_entry_count_addr() : NULL);
2076 
2077   // If the biasing toward our thread failed, then another thread
2078   // succeeded in biasing it toward itself and we need to revoke that
2079   // bias. The revocation will occur in the runtime in the slow case.
2080 
2081   b(done);
2082 
2083   bind(try_revoke_bias);
2084 
2085   // The prototype mark in the klass doesn't have the bias bit set any
2086   // more, indicating that objects of this data type are not supposed
2087   // to be biased any more. We are going to try to reset the mark of
2088   // this object to the prototype value and fall through to the
2089   // CAS-based locking scheme. Note that if our CAS fails, it means
2090   // that another thread raced us for the privilege of revoking the
2091   // bias of this particular object, so it's okay to continue in the
2092   // normal locking code.
2093 
2094   // tmp_reg low (not owner) bits are (age: 0 | pattern&epoch: prototype^swap_reg)
2095 
2096   eor(tmp_reg, tmp_reg, swap_reg); // OK except for owner bits (age preserved !)
2097 
2098   // owner bits 'random'. Clear them
2099 #ifdef AARCH64
2100   mov(tmp2, (markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place));
2101   andr(tmp_reg, tmp_reg, tmp2);
2102 #else
2103   mov(tmp_reg, AsmOperand(tmp_reg, lsl, 23));
2104   mov(tmp_reg, AsmOperand(tmp_reg, lsr, 23));
2105 #endif // AARCH64
2106 
2107   biased_locking_enter_with_cas(obj_reg, swap_reg, tmp_reg, tmp2, cas_label,
2108         (counters != NULL) ? counters->revoked_lock_entry_count_addr() : NULL);
2109 
2110   // Fall through to the normal CAS-based lock, because no matter what
2111   // the result of the above CAS, some thread must have succeeded in
2112   // removing the bias bit from the object's header.
2113 
2114   bind(cas_label);
2115 
2116   return null_check_offset;
2117 }
2118 
2119 
biased_locking_exit(Register obj_reg,Register tmp_reg,Label & done)2120 void MacroAssembler::biased_locking_exit(Register obj_reg, Register tmp_reg, Label& done) {
2121   assert(UseBiasedLocking, "why call this otherwise?");
2122 
2123   // Check for biased locking unlock case, which is a no-op
2124   // Note: we do not have to check the thread ID for two reasons.
2125   // First, the interpreter checks for IllegalMonitorStateException at
2126   // a higher level. Second, if the bias was revoked while we held the
2127   // lock, the object could not be rebiased toward another thread, so
2128   // the bias bit would be clear.
2129   ldr(tmp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
2130 
2131   andr(tmp_reg, tmp_reg, (uintx)markOopDesc::biased_lock_mask_in_place);
2132   cmp(tmp_reg, markOopDesc::biased_lock_pattern);
2133   b(done, eq);
2134 }
2135 
2136 
resolve_jobject(Register value,Register tmp1,Register tmp2)2137 void MacroAssembler::resolve_jobject(Register value,
2138                                      Register tmp1,
2139                                      Register tmp2) {
2140   assert_different_registers(value, tmp1, tmp2);
2141   Label done, not_weak;
2142   cbz(value, done);             // Use NULL as-is.
2143   STATIC_ASSERT(JNIHandles::weak_tag_mask == 1u);
2144   tbz(value, 0, not_weak);      // Test for jweak tag.
2145 
2146   // Resolve jweak.
2147   access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF,
2148                  Address(value, -JNIHandles::weak_tag_value), value, tmp1, tmp2, noreg);
2149   b(done);
2150   bind(not_weak);
2151   // Resolve (untagged) jobject.
2152   access_load_at(T_OBJECT, IN_NATIVE,
2153                  Address(value, 0), value, tmp1, tmp2, noreg);
2154   verify_oop(value);
2155   bind(done);
2156 }
2157 
2158 
2159 //////////////////////////////////////////////////////////////////////////////////
2160 
2161 #ifdef AARCH64
2162 
load_sized_value(Register dst,Address src,size_t size_in_bytes,bool is_signed)2163 void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed) {
2164   switch (size_in_bytes) {
2165     case  8: ldr(dst, src); break;
2166     case  4: is_signed ? ldr_s32(dst, src) : ldr_u32(dst, src); break;
2167     case  2: is_signed ? ldrsh(dst, src) : ldrh(dst, src); break;
2168     case  1: is_signed ? ldrsb(dst, src) : ldrb(dst, src); break;
2169     default: ShouldNotReachHere();
2170   }
2171 }
2172 
store_sized_value(Register src,Address dst,size_t size_in_bytes)2173 void MacroAssembler::store_sized_value(Register src, Address dst, size_t size_in_bytes) {
2174   switch (size_in_bytes) {
2175     case  8: str(src, dst);    break;
2176     case  4: str_32(src, dst); break;
2177     case  2: strh(src, dst);   break;
2178     case  1: strb(src, dst);   break;
2179     default: ShouldNotReachHere();
2180   }
2181 }
2182 
2183 #else
2184 
load_sized_value(Register dst,Address src,size_t size_in_bytes,bool is_signed,AsmCondition cond)2185 void MacroAssembler::load_sized_value(Register dst, Address src,
2186                                     size_t size_in_bytes, bool is_signed, AsmCondition cond) {
2187   switch (size_in_bytes) {
2188     case  4: ldr(dst, src, cond); break;
2189     case  2: is_signed ? ldrsh(dst, src, cond) : ldrh(dst, src, cond); break;
2190     case  1: is_signed ? ldrsb(dst, src, cond) : ldrb(dst, src, cond); break;
2191     default: ShouldNotReachHere();
2192   }
2193 }
2194 
2195 
store_sized_value(Register src,Address dst,size_t size_in_bytes,AsmCondition cond)2196 void MacroAssembler::store_sized_value(Register src, Address dst, size_t size_in_bytes, AsmCondition cond) {
2197   switch (size_in_bytes) {
2198     case  4: str(src, dst, cond); break;
2199     case  2: strh(src, dst, cond);   break;
2200     case  1: strb(src, dst, cond);   break;
2201     default: ShouldNotReachHere();
2202   }
2203 }
2204 #endif // AARCH64
2205 
2206 // Look up the method for a megamorphic invokeinterface call.
2207 // The target method is determined by <Rinterf, Rindex>.
2208 // The receiver klass is in Rklass.
2209 // On success, the result will be in method_result, and execution falls through.
2210 // On failure, execution transfers to the given label.
lookup_interface_method(Register Rklass,Register Rintf,RegisterOrConstant itable_index,Register method_result,Register Rscan,Register Rtmp,Label & L_no_such_interface)2211 void MacroAssembler::lookup_interface_method(Register Rklass,
2212                                              Register Rintf,
2213                                              RegisterOrConstant itable_index,
2214                                              Register method_result,
2215                                              Register Rscan,
2216                                              Register Rtmp,
2217                                              Label& L_no_such_interface) {
2218 
2219   assert_different_registers(Rklass, Rintf, Rscan, Rtmp);
2220 
2221   const int entry_size = itableOffsetEntry::size() * HeapWordSize;
2222   assert(itableOffsetEntry::interface_offset_in_bytes() == 0, "not added for convenience");
2223 
2224   // Compute start of first itableOffsetEntry (which is at the end of the vtable)
2225   const int base = in_bytes(Klass::vtable_start_offset());
2226   const int scale = exact_log2(vtableEntry::size_in_bytes());
2227   ldr_s32(Rtmp, Address(Rklass, Klass::vtable_length_offset())); // Get length of vtable
2228   add(Rscan, Rklass, base);
2229   add(Rscan, Rscan, AsmOperand(Rtmp, lsl, scale));
2230 
2231   // Search through the itable for an interface equal to incoming Rintf
2232   // itable looks like [intface][offset][intface][offset][intface][offset]
2233 
2234   Label loop;
2235   bind(loop);
2236   ldr(Rtmp, Address(Rscan, entry_size, post_indexed));
2237 #ifdef AARCH64
2238   Label found;
2239   cmp(Rtmp, Rintf);
2240   b(found, eq);
2241   cbnz(Rtmp, loop);
2242 #else
2243   cmp(Rtmp, Rintf);  // set ZF and CF if interface is found
2244   cmn(Rtmp, 0, ne);  // check if tmp == 0 and clear CF if it is
2245   b(loop, ne);
2246 #endif // AARCH64
2247 
2248 #ifdef AARCH64
2249   b(L_no_such_interface);
2250   bind(found);
2251 #else
2252   // CF == 0 means we reached the end of itable without finding icklass
2253   b(L_no_such_interface, cc);
2254 #endif // !AARCH64
2255 
2256   if (method_result != noreg) {
2257     // Interface found at previous position of Rscan, now load the method
2258     ldr_s32(Rtmp, Address(Rscan, itableOffsetEntry::offset_offset_in_bytes() - entry_size));
2259     if (itable_index.is_register()) {
2260       add(Rtmp, Rtmp, Rklass); // Add offset to Klass*
2261       assert(itableMethodEntry::size() * HeapWordSize == wordSize, "adjust the scaling in the code below");
2262       assert(itableMethodEntry::method_offset_in_bytes() == 0, "adjust the offset in the code below");
2263       ldr(method_result, Address::indexed_ptr(Rtmp, itable_index.as_register()));
2264     } else {
2265       int method_offset = itableMethodEntry::size() * HeapWordSize * itable_index.as_constant() +
2266                           itableMethodEntry::method_offset_in_bytes();
2267       add_slow(method_result, Rklass, method_offset);
2268       ldr(method_result, Address(method_result, Rtmp));
2269     }
2270   }
2271 }
2272 
2273 #ifdef COMPILER2
2274 // TODO: 8 bytes at a time? pre-fetch?
2275 // Compare char[] arrays aligned to 4 bytes.
char_arrays_equals(Register ary1,Register ary2,Register limit,Register result,Register chr1,Register chr2,Label & Ldone)2276 void MacroAssembler::char_arrays_equals(Register ary1, Register ary2,
2277                                         Register limit, Register result,
2278                                       Register chr1, Register chr2, Label& Ldone) {
2279   Label Lvector, Lloop;
2280 
2281   // Note: limit contains number of bytes (2*char_elements) != 0.
2282   tst(limit, 0x2); // trailing character ?
2283   b(Lvector, eq);
2284 
2285   // compare the trailing char
2286   sub(limit, limit, sizeof(jchar));
2287   ldrh(chr1, Address(ary1, limit));
2288   ldrh(chr2, Address(ary2, limit));
2289   cmp(chr1, chr2);
2290   mov(result, 0, ne);     // not equal
2291   b(Ldone, ne);
2292 
2293   // only one char ?
2294   tst(limit, limit);
2295   mov(result, 1, eq);
2296   b(Ldone, eq);
2297 
2298   // word by word compare, dont't need alignment check
2299   bind(Lvector);
2300 
2301   // Shift ary1 and ary2 to the end of the arrays, negate limit
2302   add(ary1, limit, ary1);
2303   add(ary2, limit, ary2);
2304   neg(limit, limit);
2305 
2306   bind(Lloop);
2307   ldr_u32(chr1, Address(ary1, limit));
2308   ldr_u32(chr2, Address(ary2, limit));
2309   cmp_32(chr1, chr2);
2310   mov(result, 0, ne);     // not equal
2311   b(Ldone, ne);
2312   adds(limit, limit, 2*sizeof(jchar));
2313   b(Lloop, ne);
2314 
2315   // Caller should set it:
2316   // mov(result_reg, 1);  //equal
2317 }
2318 #endif
2319 
inc_counter(address counter_addr,Register tmpreg1,Register tmpreg2)2320 void MacroAssembler::inc_counter(address counter_addr, Register tmpreg1, Register tmpreg2) {
2321   mov_slow(tmpreg1, counter_addr);
2322   ldr_s32(tmpreg2, tmpreg1);
2323   add_32(tmpreg2, tmpreg2, 1);
2324   str_32(tmpreg2, tmpreg1);
2325 }
2326 
floating_cmp(Register dst)2327 void MacroAssembler::floating_cmp(Register dst) {
2328 #ifdef AARCH64
2329   NOT_TESTED();
2330   cset(dst, gt);            // 1 if '>', else 0
2331   csinv(dst, dst, ZR, ge);  // previous value if '>=', else -1
2332 #else
2333   vmrs(dst, FPSCR);
2334   orr(dst, dst, 0x08000000);
2335   eor(dst, dst, AsmOperand(dst, lsl, 3));
2336   mov(dst, AsmOperand(dst, asr, 30));
2337 #endif
2338 }
2339 
restore_default_fp_mode()2340 void MacroAssembler::restore_default_fp_mode() {
2341 #ifdef AARCH64
2342   msr(SysReg_FPCR, ZR);
2343 #else
2344 #ifndef __SOFTFP__
2345   // Round to Near mode, IEEE compatible, masked exceptions
2346   mov(Rtemp, 0);
2347   vmsr(FPSCR, Rtemp);
2348 #endif // !__SOFTFP__
2349 #endif // AARCH64
2350 }
2351 
2352 #ifndef AARCH64
2353 // 24-bit word range == 26-bit byte range
check26(int offset)2354 bool check26(int offset) {
2355   // this could be simplified, but it mimics encoding and decoding
2356   // an actual branch insrtuction
2357   int off1 = offset << 6 >> 8;
2358   int encoded = off1 & ((1<<24)-1);
2359   int decoded = encoded << 8 >> 6;
2360   return offset == decoded;
2361 }
2362 #endif // !AARCH64
2363 
2364 // Perform some slight adjustments so the default 32MB code cache
2365 // is fully reachable.
first_cache_address()2366 static inline address first_cache_address() {
2367   return CodeCache::low_bound() + sizeof(HeapBlock::Header);
2368 }
last_cache_address()2369 static inline address last_cache_address() {
2370   return CodeCache::high_bound() - Assembler::InstructionSize;
2371 }
2372 
2373 #ifdef AARCH64
2374 // Can we reach target using ADRP?
page_reachable_from_cache(address target)2375 bool MacroAssembler::page_reachable_from_cache(address target) {
2376   intptr_t cl = (intptr_t)first_cache_address() & ~0xfff;
2377   intptr_t ch = (intptr_t)last_cache_address() & ~0xfff;
2378   intptr_t addr = (intptr_t)target & ~0xfff;
2379 
2380   intptr_t loffset = addr - cl;
2381   intptr_t hoffset = addr - ch;
2382   return is_imm_in_range(loffset >> 12, 21, 0) && is_imm_in_range(hoffset >> 12, 21, 0);
2383 }
2384 #endif
2385 
2386 // Can we reach target using unconditional branch or call from anywhere
2387 // in the code cache (because code can be relocated)?
_reachable_from_cache(address target)2388 bool MacroAssembler::_reachable_from_cache(address target) {
2389 #ifdef __thumb__
2390   if ((1 & (intptr_t)target) != 0) {
2391     // Return false to avoid 'b' if we need switching to THUMB mode.
2392     return false;
2393   }
2394 #endif
2395 
2396   address cl = first_cache_address();
2397   address ch = last_cache_address();
2398 
2399   if (ForceUnreachable) {
2400     // Only addresses from CodeCache can be treated as reachable.
2401     if (target < CodeCache::low_bound() || CodeCache::high_bound() < target) {
2402       return false;
2403     }
2404   }
2405 
2406   intptr_t loffset = (intptr_t)target - (intptr_t)cl;
2407   intptr_t hoffset = (intptr_t)target - (intptr_t)ch;
2408 
2409 #ifdef AARCH64
2410   return is_offset_in_range(loffset, 26) && is_offset_in_range(hoffset, 26);
2411 #else
2412   return check26(loffset - 8) && check26(hoffset - 8);
2413 #endif
2414 }
2415 
reachable_from_cache(address target)2416 bool MacroAssembler::reachable_from_cache(address target) {
2417   assert(CodeCache::contains(pc()), "not supported");
2418   return _reachable_from_cache(target);
2419 }
2420 
2421 // Can we reach the entire code cache from anywhere else in the code cache?
_cache_fully_reachable()2422 bool MacroAssembler::_cache_fully_reachable() {
2423   address cl = first_cache_address();
2424   address ch = last_cache_address();
2425   return _reachable_from_cache(cl) && _reachable_from_cache(ch);
2426 }
2427 
cache_fully_reachable()2428 bool MacroAssembler::cache_fully_reachable() {
2429   assert(CodeCache::contains(pc()), "not supported");
2430   return _cache_fully_reachable();
2431 }
2432 
jump(address target,relocInfo::relocType rtype,Register scratch NOT_AARCH64_ARG (AsmCondition cond))2433 void MacroAssembler::jump(address target, relocInfo::relocType rtype, Register scratch NOT_AARCH64_ARG(AsmCondition cond)) {
2434   assert((rtype == relocInfo::runtime_call_type) || (rtype == relocInfo::none), "not supported");
2435   if (reachable_from_cache(target)) {
2436     relocate(rtype);
2437     b(target NOT_AARCH64_ARG(cond));
2438     return;
2439   }
2440 
2441   // Note: relocate is not needed for the code below,
2442   // encoding targets in absolute format.
2443   if (ignore_non_patchable_relocations()) {
2444     rtype = relocInfo::none;
2445   }
2446 
2447 #ifdef AARCH64
2448   assert (scratch != noreg, "should be specified");
2449   InlinedAddress address_literal(target, rtype);
2450   ldr_literal(scratch, address_literal);
2451   br(scratch);
2452   int off = offset();
2453   bind_literal(address_literal);
2454 #ifdef COMPILER2
2455   if (offset() - off == wordSize) {
2456     // no padding, so insert nop for worst-case sizing
2457     nop();
2458   }
2459 #endif
2460 #else
2461   if (VM_Version::supports_movw() && (scratch != noreg) && (rtype == relocInfo::none)) {
2462     // Note: this version cannot be (atomically) patched
2463     mov_slow(scratch, (intptr_t)target, cond);
2464     bx(scratch, cond);
2465   } else {
2466     Label skip;
2467     InlinedAddress address_literal(target);
2468     if (cond != al) {
2469       b(skip, inverse(cond));
2470     }
2471     relocate(rtype);
2472     ldr_literal(PC, address_literal);
2473     bind_literal(address_literal);
2474     bind(skip);
2475   }
2476 #endif // AARCH64
2477 }
2478 
2479 // Similar to jump except that:
2480 // - near calls are valid only if any destination in the cache is near
2481 // - no movt/movw (not atomically patchable)
patchable_jump(address target,relocInfo::relocType rtype,Register scratch NOT_AARCH64_ARG (AsmCondition cond))2482 void MacroAssembler::patchable_jump(address target, relocInfo::relocType rtype, Register scratch NOT_AARCH64_ARG(AsmCondition cond)) {
2483   assert((rtype == relocInfo::runtime_call_type) || (rtype == relocInfo::none), "not supported");
2484   if (cache_fully_reachable()) {
2485     // Note: this assumes that all possible targets (the initial one
2486     // and the addressed patched to) are all in the code cache.
2487     assert(CodeCache::contains(target), "target might be too far");
2488     relocate(rtype);
2489     b(target NOT_AARCH64_ARG(cond));
2490     return;
2491   }
2492 
2493   // Discard the relocation information if not needed for CacheCompiledCode
2494   // since the next encodings are all in absolute format.
2495   if (ignore_non_patchable_relocations()) {
2496     rtype = relocInfo::none;
2497   }
2498 
2499 #ifdef AARCH64
2500   assert (scratch != noreg, "should be specified");
2501   InlinedAddress address_literal(target);
2502   relocate(rtype);
2503   ldr_literal(scratch, address_literal);
2504   br(scratch);
2505   int off = offset();
2506   bind_literal(address_literal);
2507 #ifdef COMPILER2
2508   if (offset() - off == wordSize) {
2509     // no padding, so insert nop for worst-case sizing
2510     nop();
2511   }
2512 #endif
2513 #else
2514   {
2515     Label skip;
2516     InlinedAddress address_literal(target);
2517     if (cond != al) {
2518       b(skip, inverse(cond));
2519     }
2520     relocate(rtype);
2521     ldr_literal(PC, address_literal);
2522     bind_literal(address_literal);
2523     bind(skip);
2524   }
2525 #endif // AARCH64
2526 }
2527 
call(address target,RelocationHolder rspec NOT_AARCH64_ARG (AsmCondition cond))2528 void MacroAssembler::call(address target, RelocationHolder rspec NOT_AARCH64_ARG(AsmCondition cond)) {
2529   Register scratch = LR;
2530   assert(rspec.type() == relocInfo::runtime_call_type || rspec.type() == relocInfo::none, "not supported");
2531   if (reachable_from_cache(target)) {
2532     relocate(rspec);
2533     bl(target NOT_AARCH64_ARG(cond));
2534     return;
2535   }
2536 
2537   // Note: relocate is not needed for the code below,
2538   // encoding targets in absolute format.
2539   if (ignore_non_patchable_relocations()) {
2540     // This assumes the information was needed only for relocating the code.
2541     rspec = RelocationHolder::none;
2542   }
2543 
2544 #ifndef AARCH64
2545   if (VM_Version::supports_movw() && (rspec.type() == relocInfo::none)) {
2546     // Note: this version cannot be (atomically) patched
2547     mov_slow(scratch, (intptr_t)target, cond);
2548     blx(scratch, cond);
2549     return;
2550   }
2551 #endif
2552 
2553   {
2554     Label ret_addr;
2555 #ifndef AARCH64
2556     if (cond != al) {
2557       b(ret_addr, inverse(cond));
2558     }
2559 #endif
2560 
2561 
2562 #ifdef AARCH64
2563     // TODO-AARCH64: make more optimal implementation
2564     // [ Keep in sync with MacroAssembler::call_size ]
2565     assert(rspec.type() == relocInfo::none, "call reloc not implemented");
2566     mov_slow(scratch, target);
2567     blr(scratch);
2568 #else
2569     InlinedAddress address_literal(target);
2570     relocate(rspec);
2571     adr(LR, ret_addr);
2572     ldr_literal(PC, address_literal);
2573 
2574     bind_literal(address_literal);
2575     bind(ret_addr);
2576 #endif
2577   }
2578 }
2579 
2580 #if defined(AARCH64) && defined(COMPILER2)
call_size(address target,bool far,bool patchable)2581 int MacroAssembler::call_size(address target, bool far, bool patchable) {
2582   // FIXME: mov_slow is variable-length
2583   if (!far) return 1; // bl
2584   if (patchable) return 2;  // ldr; blr
2585   return instr_count_for_mov_slow((intptr_t)target) + 1;
2586 }
2587 #endif
2588 
patchable_call(address target,RelocationHolder const & rspec,bool c2)2589 int MacroAssembler::patchable_call(address target, RelocationHolder const& rspec, bool c2) {
2590   assert(rspec.type() == relocInfo::static_call_type ||
2591          rspec.type() == relocInfo::none ||
2592          rspec.type() == relocInfo::opt_virtual_call_type, "not supported");
2593 
2594   // Always generate the relocation information, needed for patching
2595   relocate(rspec); // used by NativeCall::is_call_before()
2596   if (cache_fully_reachable()) {
2597     // Note: this assumes that all possible targets (the initial one
2598     // and the addresses patched to) are all in the code cache.
2599     assert(CodeCache::contains(target), "target might be too far");
2600     bl(target);
2601   } else {
2602 #if defined(AARCH64) && defined(COMPILER2)
2603     if (c2) {
2604       // return address needs to match call_size().
2605       // no need to trash Rtemp
2606       int off = offset();
2607       Label skip_literal;
2608       InlinedAddress address_literal(target);
2609       ldr_literal(LR, address_literal);
2610       blr(LR);
2611       int ret_addr_offset = offset();
2612       assert(offset() - off == call_size(target, true, true) * InstructionSize, "need to fix call_size()");
2613       b(skip_literal);
2614       int off2 = offset();
2615       bind_literal(address_literal);
2616       if (offset() - off2 == wordSize) {
2617         // no padding, so insert nop for worst-case sizing
2618         nop();
2619       }
2620       bind(skip_literal);
2621       return ret_addr_offset;
2622     }
2623 #endif
2624     Label ret_addr;
2625     InlinedAddress address_literal(target);
2626 #ifdef AARCH64
2627     ldr_literal(Rtemp, address_literal);
2628     adr(LR, ret_addr);
2629     br(Rtemp);
2630 #else
2631     adr(LR, ret_addr);
2632     ldr_literal(PC, address_literal);
2633 #endif
2634     bind_literal(address_literal);
2635     bind(ret_addr);
2636   }
2637   return offset();
2638 }
2639 
2640 // ((OopHandle)result).resolve();
resolve_oop_handle(Register result)2641 void MacroAssembler::resolve_oop_handle(Register result) {
2642   // OopHandle::resolve is an indirection.
2643   ldr(result, Address(result, 0));
2644 }
2645 
load_mirror(Register mirror,Register method,Register tmp)2646 void MacroAssembler::load_mirror(Register mirror, Register method, Register tmp) {
2647   const int mirror_offset = in_bytes(Klass::java_mirror_offset());
2648   ldr(tmp, Address(method, Method::const_offset()));
2649   ldr(tmp, Address(tmp,  ConstMethod::constants_offset()));
2650   ldr(tmp, Address(tmp, ConstantPool::pool_holder_offset_in_bytes()));
2651   ldr(mirror, Address(tmp, mirror_offset));
2652   resolve_oop_handle(mirror);
2653 }
2654 
2655 
2656 ///////////////////////////////////////////////////////////////////////////////
2657 
2658 // Compressed pointers
2659 
2660 #ifdef AARCH64
2661 
load_klass(Register dst_klass,Register src_oop)2662 void MacroAssembler::load_klass(Register dst_klass, Register src_oop) {
2663   if (UseCompressedClassPointers) {
2664     ldr_w(dst_klass, Address(src_oop, oopDesc::klass_offset_in_bytes()));
2665     decode_klass_not_null(dst_klass);
2666   } else {
2667     ldr(dst_klass, Address(src_oop, oopDesc::klass_offset_in_bytes()));
2668   }
2669 }
2670 
2671 #else
2672 
load_klass(Register dst_klass,Register src_oop,AsmCondition cond)2673 void MacroAssembler::load_klass(Register dst_klass, Register src_oop, AsmCondition cond) {
2674   ldr(dst_klass, Address(src_oop, oopDesc::klass_offset_in_bytes()), cond);
2675 }
2676 
2677 #endif // AARCH64
2678 
2679 // Blows src_klass.
store_klass(Register src_klass,Register dst_oop)2680 void MacroAssembler::store_klass(Register src_klass, Register dst_oop) {
2681 #ifdef AARCH64
2682   if (UseCompressedClassPointers) {
2683     assert(src_klass != dst_oop, "not enough registers");
2684     encode_klass_not_null(src_klass);
2685     str_w(src_klass, Address(dst_oop, oopDesc::klass_offset_in_bytes()));
2686     return;
2687   }
2688 #endif // AARCH64
2689   str(src_klass, Address(dst_oop, oopDesc::klass_offset_in_bytes()));
2690 }
2691 
2692 #ifdef AARCH64
2693 
store_klass_gap(Register dst)2694 void MacroAssembler::store_klass_gap(Register dst) {
2695   if (UseCompressedClassPointers) {
2696     str_w(ZR, Address(dst, oopDesc::klass_gap_offset_in_bytes()));
2697   }
2698 }
2699 
2700 #endif // AARCH64
2701 
2702 
load_heap_oop(Register dst,Address src,Register tmp1,Register tmp2,Register tmp3,DecoratorSet decorators)2703 void MacroAssembler::load_heap_oop(Register dst, Address src, Register tmp1, Register tmp2, Register tmp3, DecoratorSet decorators) {
2704   access_load_at(T_OBJECT, IN_HEAP | decorators, src, dst, tmp1, tmp2, tmp3);
2705 }
2706 
2707 // Blows src and flags.
store_heap_oop(Address obj,Register new_val,Register tmp1,Register tmp2,Register tmp3,DecoratorSet decorators)2708 void MacroAssembler::store_heap_oop(Address obj, Register new_val, Register tmp1, Register tmp2, Register tmp3, DecoratorSet decorators) {
2709   access_store_at(T_OBJECT, IN_HEAP | decorators, obj, new_val, tmp1, tmp2, tmp3, false);
2710 }
2711 
store_heap_oop_null(Address obj,Register new_val,Register tmp1,Register tmp2,Register tmp3,DecoratorSet decorators)2712 void MacroAssembler::store_heap_oop_null(Address obj, Register new_val, Register tmp1, Register tmp2, Register tmp3, DecoratorSet decorators) {
2713   access_store_at(T_OBJECT, IN_HEAP, obj, new_val, tmp1, tmp2, tmp3, true);
2714 }
2715 
access_load_at(BasicType type,DecoratorSet decorators,Address src,Register dst,Register tmp1,Register tmp2,Register tmp3)2716 void MacroAssembler::access_load_at(BasicType type, DecoratorSet decorators,
2717                                     Address src, Register dst, Register tmp1, Register tmp2, Register tmp3) {
2718   BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
2719   decorators = AccessInternal::decorator_fixup(decorators);
2720   bool as_raw = (decorators & AS_RAW) != 0;
2721   if (as_raw) {
2722     bs->BarrierSetAssembler::load_at(this, decorators, type, dst, src, tmp1, tmp2, tmp3);
2723   } else {
2724     bs->load_at(this, decorators, type, dst, src, tmp1, tmp2, tmp3);
2725   }
2726 }
2727 
access_store_at(BasicType type,DecoratorSet decorators,Address obj,Register new_val,Register tmp1,Register tmp2,Register tmp3,bool is_null)2728 void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators,
2729                                      Address obj, Register new_val, Register tmp1, Register tmp2, Register tmp3, bool is_null) {
2730   BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
2731   decorators = AccessInternal::decorator_fixup(decorators);
2732   bool as_raw = (decorators & AS_RAW) != 0;
2733   if (as_raw) {
2734     bs->BarrierSetAssembler::store_at(this, decorators, type, obj, new_val, tmp1, tmp2, tmp3, is_null);
2735   } else {
2736     bs->store_at(this, decorators, type, obj, new_val, tmp1, tmp2, tmp3, is_null);
2737   }
2738 }
2739 
2740 
2741 #ifdef AARCH64
2742 
2743 // Algorithm must match oop.inline.hpp encode_heap_oop.
encode_heap_oop(Register dst,Register src)2744 void MacroAssembler::encode_heap_oop(Register dst, Register src) {
2745   // This code pattern is matched in NativeIntruction::skip_encode_heap_oop.
2746   // Update it at modifications.
2747   assert (UseCompressedOops, "must be compressed");
2748   assert (Universe::heap() != NULL, "java heap should be initialized");
2749 #ifdef ASSERT
2750   verify_heapbase("MacroAssembler::encode_heap_oop: heap base corrupted?");
2751 #endif
2752   verify_oop(src);
2753   if (Universe::narrow_oop_base() == NULL) {
2754     if (Universe::narrow_oop_shift() != 0) {
2755       assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
2756       _lsr(dst, src, Universe::narrow_oop_shift());
2757     } else if (dst != src) {
2758       mov(dst, src);
2759     }
2760   } else {
2761     tst(src, src);
2762     csel(dst, Rheap_base, src, eq);
2763     sub(dst, dst, Rheap_base);
2764     if (Universe::narrow_oop_shift() != 0) {
2765       assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
2766       _lsr(dst, dst, Universe::narrow_oop_shift());
2767     }
2768   }
2769 }
2770 
2771 // Same algorithm as oop.inline.hpp decode_heap_oop.
decode_heap_oop(Register dst,Register src)2772 void MacroAssembler::decode_heap_oop(Register dst, Register src) {
2773 #ifdef ASSERT
2774   verify_heapbase("MacroAssembler::decode_heap_oop: heap base corrupted?");
2775 #endif
2776   assert(Universe::narrow_oop_shift() == 0 || LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
2777   if (Universe::narrow_oop_base() != NULL) {
2778     tst(src, src);
2779     add(dst, Rheap_base, AsmOperand(src, lsl, Universe::narrow_oop_shift()));
2780     csel(dst, dst, ZR, ne);
2781   } else {
2782     _lsl(dst, src, Universe::narrow_oop_shift());
2783   }
2784   verify_oop(dst);
2785 }
2786 
2787 #ifdef COMPILER2
2788 // Algorithm must match oop.inline.hpp encode_heap_oop.
2789 // Must preserve condition codes, or C2 encodeHeapOop_not_null rule
2790 // must be changed.
encode_heap_oop_not_null(Register dst,Register src)2791 void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) {
2792   assert (UseCompressedOops, "must be compressed");
2793   assert (Universe::heap() != NULL, "java heap should be initialized");
2794 #ifdef ASSERT
2795   verify_heapbase("MacroAssembler::encode_heap_oop: heap base corrupted?");
2796 #endif
2797   verify_oop(src);
2798   if (Universe::narrow_oop_base() == NULL) {
2799     if (Universe::narrow_oop_shift() != 0) {
2800       assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
2801       _lsr(dst, src, Universe::narrow_oop_shift());
2802     } else if (dst != src) {
2803           mov(dst, src);
2804     }
2805   } else {
2806     sub(dst, src, Rheap_base);
2807     if (Universe::narrow_oop_shift() != 0) {
2808       assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
2809       _lsr(dst, dst, Universe::narrow_oop_shift());
2810     }
2811   }
2812 }
2813 
2814 // Same algorithm as oops.inline.hpp decode_heap_oop.
2815 // Must preserve condition codes, or C2 decodeHeapOop_not_null rule
2816 // must be changed.
decode_heap_oop_not_null(Register dst,Register src)2817 void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) {
2818 #ifdef ASSERT
2819   verify_heapbase("MacroAssembler::decode_heap_oop: heap base corrupted?");
2820 #endif
2821   assert(Universe::narrow_oop_shift() == 0 || LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
2822   if (Universe::narrow_oop_base() != NULL) {
2823     add(dst, Rheap_base, AsmOperand(src, lsl, Universe::narrow_oop_shift()));
2824   } else {
2825     _lsl(dst, src, Universe::narrow_oop_shift());
2826   }
2827   verify_oop(dst);
2828 }
2829 
set_narrow_klass(Register dst,Klass * k)2830 void MacroAssembler::set_narrow_klass(Register dst, Klass* k) {
2831   assert(UseCompressedClassPointers, "should only be used for compressed header");
2832   assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
2833   int klass_index = oop_recorder()->find_index(k);
2834   RelocationHolder rspec = metadata_Relocation::spec(klass_index);
2835 
2836   // Relocation with special format (see relocInfo_arm.hpp).
2837   relocate(rspec);
2838   narrowKlass encoded_k = Klass::encode_klass(k);
2839   movz(dst, encoded_k & 0xffff, 0);
2840   movk(dst, (encoded_k >> 16) & 0xffff, 16);
2841 }
2842 
set_narrow_oop(Register dst,jobject obj)2843 void MacroAssembler::set_narrow_oop(Register dst, jobject obj) {
2844   assert(UseCompressedOops, "should only be used for compressed header");
2845   assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
2846   int oop_index = oop_recorder()->find_index(obj);
2847   RelocationHolder rspec = oop_Relocation::spec(oop_index);
2848 
2849   relocate(rspec);
2850   movz(dst, 0xffff, 0);
2851   movk(dst, 0xffff, 16);
2852 }
2853 
2854 #endif // COMPILER2
2855 // Must preserve condition codes, or C2 encodeKlass_not_null rule
2856 // must be changed.
encode_klass_not_null(Register r)2857 void MacroAssembler::encode_klass_not_null(Register r) {
2858   if (Universe::narrow_klass_base() != NULL) {
2859     // Use Rheap_base as a scratch register in which to temporarily load the narrow_klass_base.
2860     assert(r != Rheap_base, "Encoding a klass in Rheap_base");
2861     mov_slow(Rheap_base, Universe::narrow_klass_base());
2862     sub(r, r, Rheap_base);
2863   }
2864   if (Universe::narrow_klass_shift() != 0) {
2865     assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
2866     _lsr(r, r, Universe::narrow_klass_shift());
2867   }
2868   if (Universe::narrow_klass_base() != NULL) {
2869     reinit_heapbase();
2870   }
2871 }
2872 
2873 // Must preserve condition codes, or C2 encodeKlass_not_null rule
2874 // must be changed.
encode_klass_not_null(Register dst,Register src)2875 void MacroAssembler::encode_klass_not_null(Register dst, Register src) {
2876   if (dst == src) {
2877     encode_klass_not_null(src);
2878     return;
2879   }
2880   if (Universe::narrow_klass_base() != NULL) {
2881     mov_slow(dst, (int64_t)Universe::narrow_klass_base());
2882     sub(dst, src, dst);
2883     if (Universe::narrow_klass_shift() != 0) {
2884       assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
2885       _lsr(dst, dst, Universe::narrow_klass_shift());
2886     }
2887   } else {
2888     if (Universe::narrow_klass_shift() != 0) {
2889       assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
2890       _lsr(dst, src, Universe::narrow_klass_shift());
2891     } else {
2892       mov(dst, src);
2893     }
2894   }
2895 }
2896 
2897 // Function instr_count_for_decode_klass_not_null() counts the instructions
2898 // generated by decode_klass_not_null(register r) and reinit_heapbase(),
2899 // when (Universe::heap() != NULL).  Hence, if the instructions they
2900 // generate change, then this method needs to be updated.
instr_count_for_decode_klass_not_null()2901 int MacroAssembler::instr_count_for_decode_klass_not_null() {
2902   assert(UseCompressedClassPointers, "only for compressed klass ptrs");
2903   assert(Universe::heap() != NULL, "java heap should be initialized");
2904   if (Universe::narrow_klass_base() != NULL) {
2905     return instr_count_for_mov_slow(Universe::narrow_klass_base()) + // mov_slow
2906       1 +                                                                 // add
2907       instr_count_for_mov_slow(Universe::narrow_ptrs_base());   // reinit_heapbase() = mov_slow
2908   } else {
2909     if (Universe::narrow_klass_shift() != 0) {
2910       return 1;
2911     }
2912   }
2913   return 0;
2914 }
2915 
2916 // Must preserve condition codes, or C2 decodeKlass_not_null rule
2917 // must be changed.
decode_klass_not_null(Register r)2918 void MacroAssembler::decode_klass_not_null(Register r) {
2919   int off = offset();
2920   assert(UseCompressedClassPointers, "should only be used for compressed headers");
2921   assert(Universe::heap() != NULL, "java heap should be initialized");
2922   assert(r != Rheap_base, "Decoding a klass in Rheap_base");
2923   // Cannot assert, instr_count_for_decode_klass_not_null() counts instructions.
2924   // Also do not verify_oop as this is called by verify_oop.
2925   if (Universe::narrow_klass_base() != NULL) {
2926     // Use Rheap_base as a scratch register in which to temporarily load the narrow_klass_base.
2927     mov_slow(Rheap_base, Universe::narrow_klass_base());
2928     add(r, Rheap_base, AsmOperand(r, lsl, Universe::narrow_klass_shift()));
2929     reinit_heapbase();
2930   } else {
2931     if (Universe::narrow_klass_shift() != 0) {
2932       assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
2933       _lsl(r, r, Universe::narrow_klass_shift());
2934     }
2935   }
2936   assert((offset() - off) == (instr_count_for_decode_klass_not_null() * InstructionSize), "need to fix instr_count_for_decode_klass_not_null");
2937 }
2938 
2939 // Must preserve condition codes, or C2 decodeKlass_not_null rule
2940 // must be changed.
decode_klass_not_null(Register dst,Register src)2941 void MacroAssembler::decode_klass_not_null(Register dst, Register src) {
2942   if (src == dst) {
2943     decode_klass_not_null(src);
2944     return;
2945   }
2946 
2947   assert(UseCompressedClassPointers, "should only be used for compressed headers");
2948   assert(Universe::heap() != NULL, "java heap should be initialized");
2949   assert(src != Rheap_base, "Decoding a klass in Rheap_base");
2950   assert(dst != Rheap_base, "Decoding a klass into Rheap_base");
2951   // Also do not verify_oop as this is called by verify_oop.
2952   if (Universe::narrow_klass_base() != NULL) {
2953     mov_slow(dst, Universe::narrow_klass_base());
2954     add(dst, dst, AsmOperand(src, lsl, Universe::narrow_klass_shift()));
2955   } else {
2956     _lsl(dst, src, Universe::narrow_klass_shift());
2957   }
2958 }
2959 
2960 
reinit_heapbase()2961 void MacroAssembler::reinit_heapbase() {
2962   if (UseCompressedOops || UseCompressedClassPointers) {
2963     if (Universe::heap() != NULL) {
2964       mov_slow(Rheap_base, Universe::narrow_ptrs_base());
2965     } else {
2966       ldr_global_ptr(Rheap_base, (address)Universe::narrow_ptrs_base_addr());
2967     }
2968   }
2969 }
2970 
2971 #ifdef ASSERT
verify_heapbase(const char * msg)2972 void MacroAssembler::verify_heapbase(const char* msg) {
2973   // This code pattern is matched in NativeIntruction::skip_verify_heapbase.
2974   // Update it at modifications.
2975   assert (UseCompressedOops, "should be compressed");
2976   assert (Universe::heap() != NULL, "java heap should be initialized");
2977   if (CheckCompressedOops) {
2978     Label ok;
2979     str(Rthread, Address(Rthread, in_bytes(JavaThread::in_top_frame_unsafe_section_offset())));
2980     raw_push(Rtemp, ZR);
2981     mrs(Rtemp, Assembler::SysReg_NZCV);
2982     str(Rtemp, Address(SP, 1 * wordSize));
2983     mov_slow(Rtemp, Universe::narrow_ptrs_base());
2984     cmp(Rheap_base, Rtemp);
2985     b(ok, eq);
2986     stop(msg);
2987     bind(ok);
2988     ldr(Rtemp, Address(SP, 1 * wordSize));
2989     msr(Assembler::SysReg_NZCV, Rtemp);
2990     raw_pop(Rtemp, ZR);
2991     str(ZR, Address(Rthread, in_bytes(JavaThread::in_top_frame_unsafe_section_offset())));
2992   }
2993 }
2994 #endif // ASSERT
2995 
2996 #endif // AARCH64
2997 
2998 #ifdef COMPILER2
fast_lock(Register Roop,Register Rbox,Register Rscratch,Register Rscratch2,Register scratch3)2999 void MacroAssembler::fast_lock(Register Roop, Register Rbox, Register Rscratch, Register Rscratch2, Register scratch3)
3000 {
3001   assert(VM_Version::supports_ldrex(), "unsupported, yet?");
3002 
3003   Register Rmark      = Rscratch2;
3004 
3005   assert(Roop != Rscratch, "");
3006   assert(Roop != Rmark, "");
3007   assert(Rbox != Rscratch, "");
3008   assert(Rbox != Rmark, "");
3009 
3010   Label fast_lock, done;
3011 
3012   if (UseBiasedLocking && !UseOptoBiasInlining) {
3013     assert(scratch3 != noreg, "need extra temporary for -XX:-UseOptoBiasInlining");
3014     biased_locking_enter(Roop, Rmark, Rscratch, false, scratch3, done, done);
3015     // Fall through if lock not biased otherwise branch to done
3016   }
3017 
3018   // Invariant: Rmark loaded below does not contain biased lock pattern
3019 
3020   ldr(Rmark, Address(Roop, oopDesc::mark_offset_in_bytes()));
3021   tst(Rmark, markOopDesc::unlocked_value);
3022   b(fast_lock, ne);
3023 
3024   // Check for recursive lock
3025   // See comments in InterpreterMacroAssembler::lock_object for
3026   // explanations on the fast recursive locking check.
3027 #ifdef AARCH64
3028   intptr_t mask = ((intptr_t)3) - ((intptr_t)os::vm_page_size());
3029   Assembler::LogicalImmediate imm(mask, false);
3030   mov(Rscratch, SP);
3031   sub(Rscratch, Rmark, Rscratch);
3032   ands(Rscratch, Rscratch, imm);
3033   // set to zero if recursive lock, set to non zero otherwise (see discussion in JDK-8153107)
3034   str(Rscratch, Address(Rbox, BasicLock::displaced_header_offset_in_bytes()));
3035   b(done);
3036 
3037 #else
3038   // -1- test low 2 bits
3039   movs(Rscratch, AsmOperand(Rmark, lsl, 30));
3040   // -2- test (hdr - SP) if the low two bits are 0
3041   sub(Rscratch, Rmark, SP, eq);
3042   movs(Rscratch, AsmOperand(Rscratch, lsr, exact_log2(os::vm_page_size())), eq);
3043   // If still 'eq' then recursive locking OK
3044   // set to zero if recursive lock, set to non zero otherwise (see discussion in JDK-8153107)
3045   str(Rscratch, Address(Rbox, BasicLock::displaced_header_offset_in_bytes()));
3046   b(done);
3047 #endif
3048 
3049   bind(fast_lock);
3050   str(Rmark, Address(Rbox, BasicLock::displaced_header_offset_in_bytes()));
3051 
3052   bool allow_fallthrough_on_failure = true;
3053   bool one_shot = true;
3054   cas_for_lock_acquire(Rmark, Rbox, Roop, Rscratch, done, allow_fallthrough_on_failure, one_shot);
3055 
3056   bind(done);
3057 
3058   // At this point flags are set as follows:
3059   //  EQ -> Success
3060   //  NE -> Failure, branch to slow path
3061 }
3062 
fast_unlock(Register Roop,Register Rbox,Register Rscratch,Register Rscratch2 AARCH64_ONLY_ARG (Register Rscratch3))3063 void MacroAssembler::fast_unlock(Register Roop, Register Rbox, Register Rscratch, Register Rscratch2  AARCH64_ONLY_ARG(Register Rscratch3))
3064 {
3065   assert(VM_Version::supports_ldrex(), "unsupported, yet?");
3066 
3067   Register Rmark      = Rscratch2;
3068 
3069   assert(Roop != Rscratch, "");
3070   assert(Roop != Rmark, "");
3071   assert(Rbox != Rscratch, "");
3072   assert(Rbox != Rmark, "");
3073 
3074   Label done;
3075 
3076   if (UseBiasedLocking && !UseOptoBiasInlining) {
3077     biased_locking_exit(Roop, Rscratch, done);
3078   }
3079 
3080   ldr(Rmark, Address(Rbox, BasicLock::displaced_header_offset_in_bytes()));
3081   // If hdr is NULL, we've got recursive locking and there's nothing more to do
3082   cmp(Rmark, 0);
3083   b(done, eq);
3084 
3085   // Restore the object header
3086   bool allow_fallthrough_on_failure = true;
3087   bool one_shot = true;
3088   cas_for_lock_release(Rmark, Rbox, Roop, Rscratch, done, allow_fallthrough_on_failure, one_shot);
3089 
3090   bind(done);
3091 
3092 }
3093 #endif // COMPILER2
3094